novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -2
  3. novel_downloader/cli/config.py +1 -83
  4. novel_downloader/cli/download.py +4 -5
  5. novel_downloader/cli/export.py +4 -1
  6. novel_downloader/cli/main.py +2 -0
  7. novel_downloader/cli/search.py +123 -0
  8. novel_downloader/config/__init__.py +3 -10
  9. novel_downloader/config/adapter.py +190 -54
  10. novel_downloader/config/loader.py +2 -3
  11. novel_downloader/core/__init__.py +13 -13
  12. novel_downloader/core/downloaders/__init__.py +10 -11
  13. novel_downloader/core/downloaders/base.py +152 -26
  14. novel_downloader/core/downloaders/biquge.py +5 -1
  15. novel_downloader/core/downloaders/common.py +157 -378
  16. novel_downloader/core/downloaders/esjzone.py +5 -1
  17. novel_downloader/core/downloaders/linovelib.py +5 -1
  18. novel_downloader/core/downloaders/qianbi.py +291 -4
  19. novel_downloader/core/downloaders/qidian.py +199 -285
  20. novel_downloader/core/downloaders/registry.py +67 -0
  21. novel_downloader/core/downloaders/sfacg.py +5 -1
  22. novel_downloader/core/downloaders/yamibo.py +5 -1
  23. novel_downloader/core/exporters/__init__.py +10 -11
  24. novel_downloader/core/exporters/base.py +87 -7
  25. novel_downloader/core/exporters/biquge.py +5 -8
  26. novel_downloader/core/exporters/common/__init__.py +2 -2
  27. novel_downloader/core/exporters/common/epub.py +82 -166
  28. novel_downloader/core/exporters/common/main_exporter.py +0 -60
  29. novel_downloader/core/exporters/common/txt.py +82 -83
  30. novel_downloader/core/exporters/epub_util.py +157 -1330
  31. novel_downloader/core/exporters/esjzone.py +5 -8
  32. novel_downloader/core/exporters/linovelib/__init__.py +2 -2
  33. novel_downloader/core/exporters/linovelib/epub.py +157 -212
  34. novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
  35. novel_downloader/core/exporters/linovelib/txt.py +67 -63
  36. novel_downloader/core/exporters/qianbi.py +5 -8
  37. novel_downloader/core/exporters/qidian.py +14 -4
  38. novel_downloader/core/exporters/registry.py +53 -0
  39. novel_downloader/core/exporters/sfacg.py +5 -8
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/exporters/yamibo.py +5 -8
  42. novel_downloader/core/fetchers/__init__.py +19 -24
  43. novel_downloader/core/fetchers/base/__init__.py +3 -3
  44. novel_downloader/core/fetchers/base/browser.py +23 -4
  45. novel_downloader/core/fetchers/base/session.py +30 -5
  46. novel_downloader/core/fetchers/biquge/__init__.py +3 -3
  47. novel_downloader/core/fetchers/biquge/browser.py +5 -0
  48. novel_downloader/core/fetchers/biquge/session.py +6 -1
  49. novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
  50. novel_downloader/core/fetchers/esjzone/browser.py +5 -0
  51. novel_downloader/core/fetchers/esjzone/session.py +6 -1
  52. novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
  53. novel_downloader/core/fetchers/linovelib/browser.py +6 -1
  54. novel_downloader/core/fetchers/linovelib/session.py +6 -1
  55. novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
  56. novel_downloader/core/fetchers/qianbi/browser.py +5 -0
  57. novel_downloader/core/fetchers/qianbi/session.py +5 -0
  58. novel_downloader/core/fetchers/qidian/__init__.py +3 -3
  59. novel_downloader/core/fetchers/qidian/browser.py +12 -4
  60. novel_downloader/core/fetchers/qidian/session.py +11 -3
  61. novel_downloader/core/fetchers/registry.py +71 -0
  62. novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
  63. novel_downloader/core/fetchers/sfacg/browser.py +5 -0
  64. novel_downloader/core/fetchers/sfacg/session.py +5 -0
  65. novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
  66. novel_downloader/core/fetchers/yamibo/browser.py +5 -0
  67. novel_downloader/core/fetchers/yamibo/session.py +6 -1
  68. novel_downloader/core/interfaces/__init__.py +7 -5
  69. novel_downloader/core/interfaces/searcher.py +18 -0
  70. novel_downloader/core/parsers/__init__.py +10 -11
  71. novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
  72. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
  73. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
  74. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
  75. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  76. novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
  77. novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
  78. novel_downloader/core/parsers/qidian/main_parser.py +10 -21
  79. novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
  80. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
  81. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  82. novel_downloader/core/parsers/registry.py +68 -0
  83. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
  84. novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
  85. novel_downloader/core/searchers/__init__.py +20 -0
  86. novel_downloader/core/searchers/base.py +92 -0
  87. novel_downloader/core/searchers/biquge.py +83 -0
  88. novel_downloader/core/searchers/esjzone.py +84 -0
  89. novel_downloader/core/searchers/qianbi.py +131 -0
  90. novel_downloader/core/searchers/qidian.py +87 -0
  91. novel_downloader/core/searchers/registry.py +63 -0
  92. novel_downloader/locales/en.json +12 -4
  93. novel_downloader/locales/zh.json +12 -4
  94. novel_downloader/models/__init__.py +4 -30
  95. novel_downloader/models/config.py +12 -6
  96. novel_downloader/models/search.py +16 -0
  97. novel_downloader/models/types.py +0 -2
  98. novel_downloader/resources/config/settings.toml +31 -4
  99. novel_downloader/resources/css_styles/intro.css +83 -0
  100. novel_downloader/resources/css_styles/main.css +30 -89
  101. novel_downloader/utils/__init__.py +52 -0
  102. novel_downloader/utils/chapter_storage.py +244 -224
  103. novel_downloader/utils/constants.py +1 -21
  104. novel_downloader/utils/epub/__init__.py +34 -0
  105. novel_downloader/utils/epub/builder.py +377 -0
  106. novel_downloader/utils/epub/constants.py +77 -0
  107. novel_downloader/utils/epub/documents.py +403 -0
  108. novel_downloader/utils/epub/models.py +134 -0
  109. novel_downloader/utils/epub/utils.py +212 -0
  110. novel_downloader/utils/file_utils/__init__.py +10 -14
  111. novel_downloader/utils/file_utils/io.py +20 -51
  112. novel_downloader/utils/file_utils/normalize.py +2 -2
  113. novel_downloader/utils/file_utils/sanitize.py +2 -3
  114. novel_downloader/utils/fontocr/__init__.py +5 -5
  115. novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
  116. novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
  117. novel_downloader/utils/fontocr/ocr_v1.py +13 -1
  118. novel_downloader/utils/fontocr/ocr_v2.py +13 -1
  119. novel_downloader/utils/fontocr/ocr_v3.py +744 -0
  120. novel_downloader/utils/i18n.py +2 -0
  121. novel_downloader/utils/logger.py +2 -0
  122. novel_downloader/utils/network.py +110 -251
  123. novel_downloader/utils/state.py +1 -0
  124. novel_downloader/utils/text_utils/__init__.py +18 -17
  125. novel_downloader/utils/text_utils/diff_display.py +4 -5
  126. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  127. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  128. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  129. novel_downloader/utils/time_utils/__init__.py +3 -3
  130. novel_downloader/utils/time_utils/datetime_utils.py +4 -5
  131. novel_downloader/utils/time_utils/sleep_utils.py +2 -3
  132. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
  133. novel_downloader-1.5.0.dist-info/RECORD +164 -0
  134. novel_downloader/config/site_rules.py +0 -94
  135. novel_downloader/core/factory/__init__.py +0 -20
  136. novel_downloader/core/factory/downloader.py +0 -73
  137. novel_downloader/core/factory/exporter.py +0 -58
  138. novel_downloader/core/factory/fetcher.py +0 -96
  139. novel_downloader/core/factory/parser.py +0 -86
  140. novel_downloader/core/fetchers/common/__init__.py +0 -14
  141. novel_downloader/core/fetchers/common/browser.py +0 -79
  142. novel_downloader/core/fetchers/common/session.py +0 -79
  143. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  144. novel_downloader/core/parsers/common/__init__.py +0 -13
  145. novel_downloader/core/parsers/common/helper.py +0 -323
  146. novel_downloader/core/parsers/common/main_parser.py +0 -106
  147. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  148. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  149. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  150. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  151. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  152. novel_downloader/models/browser.py +0 -21
  153. novel_downloader/models/site_rules.py +0 -99
  154. novel_downloader/models/tasks.py +0 -33
  155. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  156. novel_downloader/resources/json/replace_word_map.json +0 -4
  157. novel_downloader/resources/text/blacklist.txt +0 -22
  158. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  159. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  160. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  161. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  162. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
  163. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
  164. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
  165. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,8 @@ novel_downloader.utils.i18n
6
6
  Multilingual text dictionary and utility for CLI and interactive mode.
7
7
  """
8
8
 
9
+ __all__ = ["t"]
10
+
9
11
  import json
10
12
  from typing import Any
11
13
 
@@ -7,6 +7,8 @@ Provides a configurable logging setup for Python applications.
7
7
  Log files are rotated daily and named with the given logger name and current date.
8
8
  """
9
9
 
10
+ __all__ = ["setup_logging"]
11
+
10
12
  import logging
11
13
  from datetime import datetime
12
14
  from logging.handlers import TimedRotatingFileHandler
@@ -6,295 +6,154 @@ novel_downloader.utils.network
6
6
  Utilities for handling HTTP requests and downloading remote resources.
7
7
  """
8
8
 
9
+ __all__ = ["download"]
10
+
9
11
  import logging
10
- import random
11
- import time
12
12
  from pathlib import Path
13
13
  from typing import Literal
14
14
  from urllib.parse import unquote, urlparse
15
15
 
16
16
  import requests
17
+ from requests.adapters import HTTPAdapter
18
+ from urllib3.util.retry import Retry
17
19
 
18
- from .constants import DEFAULT_HEADERS, DEFAULT_IMAGE_SUFFIX
20
+ from .constants import DEFAULT_HEADERS
21
+ from .file_utils import sanitize_filename
19
22
  from .file_utils.io import _get_non_conflicting_path, _write_file
20
23
 
21
24
  logger = logging.getLogger(__name__)
22
-
23
25
  _DEFAULT_CHUNK_SIZE = 8192 # 8KB per chunk for streaming downloads
24
26
 
25
27
 
26
- def http_get_with_retry(
27
- url: str,
28
- *,
29
- retries: int = 3,
30
- timeout: int = 10,
31
- backoff: float = 0.5,
32
- headers: dict[str, str] | None = None,
33
- stream: bool = False,
34
- ) -> requests.Response | None:
28
+ def _normalize_url(url: str) -> str:
35
29
  """
36
- Perform a GET request with retry support.
37
-
38
- :param url: URL to request.
39
- :param retries: Number of retry attempts.
40
- :param timeout: Timeout in seconds per request.
41
- :param backoff: Base backoff delay between retries.
42
- :param headers: Optional HTTP headers.
43
- :param stream: Whether to stream the response.
44
- :return: Response object if successful, else None.
30
+ Ensure URL has scheme, defaulting to https:// if missing.
45
31
  """
46
- for attempt in range(1, retries + 1):
47
- try:
48
- response = requests.get(
49
- url, timeout=timeout, headers=headers, stream=stream
50
- )
51
- response.raise_for_status()
52
- return response
53
- except requests.RequestException as e:
54
- logger.warning("[http] Attempt %s/%s failed: %s", attempt, retries, e)
55
- if attempt < retries:
56
- sleep_time = backoff * (2 ** (attempt - 1)) + random.uniform(0, 0.1)
57
- time.sleep(sleep_time)
58
- except Exception as e:
59
- logger.error("[http] Unexpected error: %s", e)
60
- break
61
-
62
- logger.error("[http] Failed after %s attempts: %s", retries, url)
63
- return None
64
-
32
+ if url.startswith("//"):
33
+ return "https:" + url
34
+ if not url.startswith(("http://", "https://")):
35
+ return "https://" + url
36
+ return url
65
37
 
66
- def image_url_to_filename(url: str) -> str:
67
- """
68
- Parse and sanitize a image filename from a URL.
69
- If no filename or suffix exists, fallback to default name and extension.
70
38
 
71
- :param url: URL string
72
- :return: Safe filename string
73
- """
39
+ def _build_filepath(
40
+ folder: Path,
41
+ url: str,
42
+ filename: str | None,
43
+ default_suffix: str,
44
+ on_exist: Literal["overwrite", "skip", "rename"],
45
+ ) -> Path:
74
46
  parsed_url = urlparse(url)
75
- path = unquote(parsed_url.path)
76
- filename = Path(path).name
77
-
78
- if not filename:
79
- filename = "image"
80
-
81
- if not Path(filename).suffix:
82
- filename += DEFAULT_IMAGE_SUFFIX
83
-
84
- return filename
47
+ url_path = Path(unquote(parsed_url.path))
48
+
49
+ raw_name = filename or url_path.name or "unnamed"
50
+ name = sanitize_filename(raw_name)
51
+ suffix = default_suffix or url_path.suffix
52
+ if suffix and not suffix.startswith("."):
53
+ suffix = "." + suffix
54
+
55
+ file_path = folder / name
56
+ if not file_path.suffix and suffix:
57
+ file_path = file_path.with_suffix(suffix)
58
+
59
+ if on_exist == "rename":
60
+ file_path = _get_non_conflicting_path(file_path)
61
+ return file_path
62
+
63
+
64
+ def _make_session(
65
+ retries: int,
66
+ backoff: float,
67
+ headers: dict[str, str] | None,
68
+ ) -> requests.Session:
69
+ session = requests.Session()
70
+ session.headers.update(headers or DEFAULT_HEADERS)
71
+
72
+ retry = Retry(
73
+ total=retries,
74
+ backoff_factor=backoff,
75
+ status_forcelist=[429, 500, 502, 503, 504],
76
+ allowed_methods={"GET", "HEAD", "OPTIONS"},
77
+ )
78
+ adapter = HTTPAdapter(max_retries=retry)
79
+ session.mount("https://", adapter)
80
+ session.mount("http://", adapter)
81
+ return session
85
82
 
86
83
 
87
- def download_image(
84
+ def download(
88
85
  url: str,
89
- target_folder: str | Path | None = None,
90
- target_name: str | None = None,
86
+ target_dir: str | Path | None = None,
87
+ filename: str | None = None,
91
88
  *,
92
89
  timeout: int = 10,
93
90
  retries: int = 3,
94
91
  backoff: float = 0.5,
95
92
  headers: dict[str, str] | None = None,
93
+ stream: bool = False,
96
94
  on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
95
+ default_suffix: str = "",
96
+ chunk_size: int = _DEFAULT_CHUNK_SIZE,
97
97
  ) -> Path | None:
98
98
  """
99
- Download an image from `url` and save it to `target_folder`, returning the Path.
100
- Can override the filename via `target_name`.
101
-
102
- :param url: Image URL. Can start with 'http', '//', or without protocol.
103
- :param target_folder: Directory to save into (defaults to cwd).
104
- :param target_name: Optional filename (with or without extension).
105
- :param timeout: Request timeout in seconds.
106
- :param retries: Number of retry attempts.
107
- :param backoff: Base delay between retries (exponential backoff).
108
- :param on_exist: What to do if file exists: 'overwrite', 'skip', or 'rename'.
109
- :return: Path to the saved image, or `None` on any failure.
99
+ Download a URL to disk, with retries, optional rename/skip, and cleanup on failure.
100
+
101
+ :param url: the file URL.
102
+ :param target_dir: directory to save into.
103
+ :param filename: override the basename (else from URL path).
104
+ :param timeout: per-request timeout.
105
+ :param retries: GET retry count.
106
+ :param backoff: exponential backoff base.
107
+ :param headers: optional headers.
108
+ :param stream: Whether to stream the response.
109
+ :param on_exist: if 'skip', return filepath; if 'rename', auto-rename.
110
+ :param default_suffix: used if no suffix in URL or filename.
111
+ :param chunk_size: streaming chunk size.
112
+ :return: path to the downloaded file.
110
113
  """
111
- # Normalize URL
112
- if url.startswith("//"):
113
- url = "https:" + url
114
- elif not url.startswith("http"):
115
- url = "https://" + url
114
+ url = _normalize_url(url)
116
115
 
117
- folder = Path(target_folder) if target_folder else Path.cwd()
116
+ folder = Path(target_dir) if target_dir else Path.cwd()
118
117
  folder.mkdir(parents=True, exist_ok=True)
119
118
 
120
- if target_name:
121
- name = target_name
122
- if not Path(name).suffix:
123
- # infer ext from URL-derived name
124
- name += Path(image_url_to_filename(url)).suffix
125
- else:
126
- name = image_url_to_filename(url)
127
- save_path = folder / name
128
-
129
- # Handle existing file
130
- if save_path.exists():
131
- if on_exist == "skip":
132
- logger.debug("Skipping download; file exists: %s", save_path)
133
- return save_path
134
- if on_exist == "rename":
135
- save_path = _get_non_conflicting_path(save_path)
136
-
137
- # Proceed with download
138
- resp = http_get_with_retry(
139
- url,
140
- retries=retries,
141
- timeout=timeout,
142
- backoff=backoff,
143
- headers=headers or DEFAULT_HEADERS,
144
- stream=False,
145
- )
146
-
147
- if not (resp and resp.ok):
148
- logger.warning(
149
- "Failed to download %s (status=%s)",
150
- url,
151
- getattr(resp, "status_code", None),
152
- )
153
- return None
154
-
155
- # Write to disk
156
- try:
157
- _write_file(
158
- content=resp.content,
159
- filepath=save_path,
160
- mode="wb",
161
- on_exist=on_exist,
162
- )
163
- return save_path
164
- except Exception:
165
- logger.exception("Error saving image to %s", save_path)
166
- return None
167
-
168
-
169
- def download_font_file(
170
- url: str,
171
- target_folder: str | Path,
172
- *,
173
- timeout: int = 10,
174
- retries: int = 3,
175
- backoff: float = 0.5,
176
- on_exist: Literal["overwrite", "skip", "rename"] = "skip",
177
- ) -> Path | None:
178
- """
179
- Download a font file from a URL and save it locally with retry and overwrite control
180
-
181
- :param url: Fully-qualified font file URL.
182
- :param target_folder: Local folder to save the font file.
183
- :param timeout: Timeout for each request (in seconds).
184
- :param retries: Number of retry attempts.
185
- :param backoff: Base backoff time between retries (in seconds).
186
- :param on_exist: File conflict strategy: 'overwrite', 'skip', or 'rename'.
187
- :return: Path to the saved font file, or None if failed.
188
- """
189
- # Validate and parse URL
190
- parsed = urlparse(url)
191
- if not parsed.scheme or not parsed.netloc:
192
- logger.warning("[font] Invalid URL: %s", url)
193
- return None
194
-
195
- # Determine filename
196
- filename = Path(unquote(parsed.path)).name
197
- if not filename:
198
- logger.warning("[font] Could not extract filename from URL: %s", url)
199
- return None
200
-
201
- # Resolve save path
202
- target_folder = Path(target_folder)
203
- target_folder.mkdir(parents=True, exist_ok=True)
204
- font_path = target_folder / filename
205
-
206
- # If skip and file exists -> return immediately
207
- if on_exist == "skip" and font_path.exists():
208
- logger.debug("[font] File exists, skipping download: %s", font_path)
209
- return font_path
210
-
211
- # Retry download with exponential backoff
212
- response = http_get_with_retry(
119
+ save_path = _build_filepath(
120
+ folder,
213
121
  url,
214
- retries=retries,
215
- timeout=timeout,
216
- backoff=backoff,
217
- headers=DEFAULT_HEADERS,
218
- stream=True,
122
+ filename,
123
+ default_suffix,
124
+ on_exist,
219
125
  )
220
126
 
221
- if response:
222
- try:
223
- if on_exist == "rename":
224
- font_path = _get_non_conflicting_path(font_path)
225
-
226
- with open(font_path, "wb") as f:
227
- for chunk in response.iter_content(chunk_size=_DEFAULT_CHUNK_SIZE):
228
- if chunk:
229
- f.write(chunk)
230
-
231
- logger.debug("[font] Font saved to: %s", font_path)
232
- return font_path
233
-
234
- except Exception as e:
235
- logger.error("[font] Error writing font to disk: %s", e)
236
-
237
- return None
238
-
239
-
240
- def download_js_file(
241
- url: str,
242
- target_folder: str | Path,
243
- *,
244
- timeout: int = 10,
245
- retries: int = 3,
246
- backoff: float = 0.5,
247
- on_exist: Literal["overwrite", "skip", "rename"] = "skip",
248
- ) -> Path | None:
249
- """
250
- Download a JavaScript (.js) file from a URL and save it locally.
251
-
252
- :param url: Fully-qualified JS file URL.
253
- :param target_folder: Local folder to save the JS file.
254
- :param timeout: Timeout for each request (in seconds).
255
- :param retries: Number of retry attempts.
256
- :param backoff: Base backoff time between retries (in seconds).
257
- :param on_exist: File conflict strategy: 'overwrite', 'skip', or 'rename'.
258
- :return: Path to the saved JS file, or None if failed.
259
- """
260
- parsed = urlparse(url)
261
- if not parsed.scheme or not parsed.netloc:
262
- logger.warning("[js] Invalid URL: %s", url)
263
- return None
264
-
265
- # Determine filename
266
- filename = Path(unquote(parsed.path)).name
267
- if not filename.endswith(".js"):
268
- filename += ".js"
269
-
270
- target_folder = Path(target_folder)
271
- target_folder.mkdir(parents=True, exist_ok=True)
272
- save_path = target_folder / filename
273
-
274
- if on_exist == "skip" and save_path.exists():
275
- logger.debug("[js] File exists, skipping download: %s", save_path)
127
+ # Handle existing file
128
+ if save_path.exists() and on_exist == "skip":
129
+ logger.debug("Skipping download; file exists: %s", save_path)
276
130
  return save_path
277
131
 
278
- response = http_get_with_retry(
279
- url,
280
- retries=retries,
281
- timeout=timeout,
282
- backoff=backoff,
283
- headers=DEFAULT_HEADERS,
284
- stream=False,
285
- )
286
-
287
- if response and response.ok:
288
- content = response.content
289
-
290
- if on_exist == "rename":
291
- save_path = _get_non_conflicting_path(save_path)
292
-
132
+ with _make_session(retries, backoff, headers) as session:
293
133
  try:
294
- _write_file(content=content, filepath=save_path, mode="wb")
295
- logger.debug("[js] JS file saved to: %s", save_path)
296
- return save_path
134
+ resp = session.get(url, timeout=timeout, stream=stream)
135
+ resp.raise_for_status()
297
136
  except Exception as e:
298
- logger.error("[js] Error writing JS to disk: %s", e)
299
-
137
+ logger.warning("[download] request failed: %s", e)
138
+ return None
139
+
140
+ # Write to disk
141
+ if stream:
142
+ try:
143
+ with open(save_path, "wb") as f:
144
+ for chunk in resp.iter_content(chunk_size=chunk_size):
145
+ if chunk:
146
+ f.write(chunk)
147
+ return save_path
148
+ except Exception as e:
149
+ logger.warning("[download] write failed: %s", e)
150
+ save_path.unlink(missing_ok=True)
151
+ return None
152
+ else:
153
+ return _write_file(
154
+ content=resp.content,
155
+ filepath=save_path,
156
+ write_mode="wb",
157
+ on_exist=on_exist,
158
+ )
300
159
  return None
@@ -8,6 +8,7 @@ Supported sections:
8
8
  - general: global preferences (e.g. language)
9
9
  - sites: per-site flags & data (e.g. manual_login, cookies)
10
10
  """
11
+
11
12
  import json
12
13
  from pathlib import Path
13
14
  from typing import Any
@@ -6,28 +6,29 @@ novel_downloader.utils.text_utils
6
6
  Utility modules for text formatting, font mapping, cleaning, and diff display.
7
7
 
8
8
  Submodules:
9
- - font_mapping: Replace obfuscated characters using font maps
10
- - chapter_formatting: Build structured chapter strings from raw content
11
- - text_cleaning: Remove promo text and check for spam lines
12
9
  - diff_display: Generate inline diffs with aligned character markers
10
+ - numeric_conversion: Convert between Chinese and Arabic numerals
11
+ - text_cleaner: Text cleaning and normalization utilities
12
+ - truncate_utils: Text truncation and content prefix generation
13
13
  """
14
14
 
15
- from .chapter_formatting import format_chapter
16
- from .diff_display import diff_inline_display
17
- from .font_mapping import apply_font_mapping
18
- from .text_cleaning import (
19
- clean_chapter_title,
20
- content_prefix,
21
- is_promotional_line,
22
- truncate_half_lines,
23
- )
24
-
25
15
  __all__ = [
26
- "apply_font_mapping",
27
- "format_chapter",
28
- "clean_chapter_title",
29
- "is_promotional_line",
16
+ "TextCleaner",
17
+ "get_cleaner",
30
18
  "content_prefix",
31
19
  "truncate_half_lines",
20
+ "chinese_to_arabic",
21
+ "arabic_to_chinese",
32
22
  "diff_inline_display",
33
23
  ]
24
+
25
+ from .diff_display import diff_inline_display
26
+ from .numeric_conversion import (
27
+ arabic_to_chinese,
28
+ chinese_to_arabic,
29
+ )
30
+ from .text_cleaner import TextCleaner, get_cleaner
31
+ from .truncate_utils import (
32
+ content_prefix,
33
+ truncate_half_lines,
34
+ )
@@ -6,6 +6,10 @@ novel_downloader.utils.text_utils.diff_display
6
6
  Generate inline character-level diff between two strings with visual markers.
7
7
  """
8
8
 
9
+ __all__ = [
10
+ "diff_inline_display",
11
+ ]
12
+
9
13
  import difflib
10
14
  import unicodedata
11
15
 
@@ -67,8 +71,3 @@ def diff_inline_display(old_str: str, new_str: str) -> str:
67
71
  marker_s2 += "".join(_char_width_space(c, mark_1, mark_2) for c in s2_seg)
68
72
  output_str = f"-{s1}\n {marker_s1}\n+{s2}\n {marker_s2}"
69
73
  return output_str
70
-
71
-
72
- __all__ = [
73
- "diff_inline_display",
74
- ]