novel-downloader 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +3 -3
  3. novel_downloader/cli/export.py +1 -1
  4. novel_downloader/cli/ui.py +7 -7
  5. novel_downloader/config/adapter.py +191 -154
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/exporters/common/txt.py +9 -9
  8. novel_downloader/core/exporters/linovelib/txt.py +9 -9
  9. novel_downloader/core/fetchers/qidian.py +20 -35
  10. novel_downloader/core/interfaces/fetcher.py +2 -2
  11. novel_downloader/core/interfaces/parser.py +2 -2
  12. novel_downloader/core/parsers/base.py +1 -0
  13. novel_downloader/core/parsers/eightnovel.py +2 -2
  14. novel_downloader/core/parsers/esjzone.py +3 -3
  15. novel_downloader/core/parsers/qidian/main_parser.py +747 -12
  16. novel_downloader/core/parsers/qidian/utils/__init__.py +2 -21
  17. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  18. novel_downloader/core/parsers/xiguashuwu.py +6 -12
  19. novel_downloader/locales/en.json +3 -3
  20. novel_downloader/locales/zh.json +3 -3
  21. novel_downloader/utils/__init__.py +0 -2
  22. novel_downloader/utils/chapter_storage.py +2 -3
  23. novel_downloader/utils/constants.py +1 -3
  24. novel_downloader/utils/cookies.py +32 -17
  25. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  26. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  27. novel_downloader/utils/epub/__init__.py +2 -3
  28. novel_downloader/utils/epub/builder.py +6 -6
  29. novel_downloader/utils/epub/constants.py +5 -5
  30. novel_downloader/utils/epub/documents.py +7 -7
  31. novel_downloader/utils/epub/models.py +8 -8
  32. novel_downloader/utils/epub/utils.py +10 -10
  33. novel_downloader/utils/file_utils/io.py +48 -73
  34. novel_downloader/utils/file_utils/normalize.py +1 -7
  35. novel_downloader/utils/file_utils/sanitize.py +4 -11
  36. novel_downloader/utils/fontocr/__init__.py +13 -0
  37. novel_downloader/utils/{fontocr.py → fontocr/core.py} +70 -61
  38. novel_downloader/utils/fontocr/loader.py +50 -0
  39. novel_downloader/utils/logger.py +80 -56
  40. novel_downloader/utils/network.py +16 -40
  41. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  42. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  43. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  44. novel_downloader/web/main.py +1 -1
  45. novel_downloader/web/pages/search.py +3 -3
  46. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/METADATA +2 -1
  47. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/RECORD +51 -55
  48. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  49. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  50. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  51. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  52. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  53. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  54. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  55. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +0 -0
  56. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  57. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -14,8 +14,6 @@ __all__ = ["normalize_txt_line_endings"]
14
14
  import logging
15
15
  from pathlib import Path
16
16
 
17
- logger = logging.getLogger(__name__)
18
-
19
17
 
20
18
  def normalize_txt_line_endings(folder_path: str | Path) -> None:
21
19
  """
@@ -28,7 +26,6 @@ def normalize_txt_line_endings(folder_path: str | Path) -> None:
28
26
  """
29
27
  path = Path(folder_path).resolve()
30
28
  if not path.exists() or not path.is_dir():
31
- logger.warning("[file] Invalid folder: %s", path)
32
29
  return
33
30
 
34
31
  count_success, count_fail = 0, 0
@@ -38,13 +35,10 @@ def normalize_txt_line_endings(folder_path: str | Path) -> None:
38
35
  content = txt_file.read_text(encoding="utf-8")
39
36
  normalized = content.replace("\r\n", "\n").replace("\r", "\n")
40
37
  txt_file.write_text(normalized, encoding="utf-8", newline="\n")
41
- logger.debug("[file] Normalized: %s", txt_file)
42
38
  count_success += 1
43
- except (OSError, UnicodeDecodeError) as e:
44
- logger.warning("[file] Failed: %s | %s", txt_file, e)
39
+ except (OSError, UnicodeDecodeError):
45
40
  count_fail += 1
46
41
 
47
- logger.info("[file] Completed. Success: %s, Failed: %s", count_success, count_fail)
48
42
  return
49
43
 
50
44
 
@@ -9,13 +9,9 @@ on different operating systems.
9
9
 
10
10
  __all__ = ["sanitize_filename"]
11
11
 
12
- import logging
13
12
  import os
14
13
  import re
15
14
 
16
- logger = logging.getLogger(__name__)
17
-
18
- # Windows 保留名称列表 (忽略大小写)
19
15
  _WIN_RESERVED_NAMES = {
20
16
  "CON",
21
17
  "PRN",
@@ -36,8 +32,8 @@ def sanitize_filename(filename: str, max_length: int | None = 255) -> str:
36
32
 
37
33
  This function checks the operating system environment and applies the appropriate
38
34
  filtering rules:
39
- - On Windows, it replaces characters: <>:"/\\|?*
40
- - On POSIX systems, it replaces the forward slash '/'
35
+ * On Windows, it replaces characters: <>:"/\\|?*
36
+ * On POSIX systems, it replaces the forward slash '/'
41
37
 
42
38
  :param filename: The input filename to sanitize.
43
39
  :param max_length: Optional maximum length of the output filename. Defaults to 255.
@@ -47,7 +43,7 @@ def sanitize_filename(filename: str, max_length: int | None = 255) -> str:
47
43
 
48
44
  name = pattern.sub("_", filename).strip(" .")
49
45
 
50
- stem, dot, ext = name.partition(".")
46
+ stem, dot, ext = name.rpartition(".")
51
47
  if os.name == "nt" and stem.upper() in _WIN_RESERVED_NAMES:
52
48
  stem = f"_{stem}"
53
49
  cleaned = f"{stem}{dot}{ext}" if ext else stem
@@ -59,7 +55,4 @@ def sanitize_filename(filename: str, max_length: int | None = 255) -> str:
59
55
  else:
60
56
  cleaned = cleaned[:max_length]
61
57
 
62
- if not cleaned:
63
- cleaned = "_untitled"
64
- logger.debug("[file] Sanitized filename: %r -> %r", filename, cleaned)
65
- return cleaned
58
+ return cleaned or "_untitled"
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.utils.fontocr
4
+ ------------------------------
5
+
6
+ Lazy-loading interface for FontOCR. Provides a safe entry point
7
+ to obtain an OCR utility instance if optional dependencies are available.
8
+ """
9
+
10
+ __all__ = ["get_font_ocr"]
11
+ __version__ = "4.0"
12
+
13
+ from .loader import get_font_ocr
@@ -1,28 +1,23 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.utils.fontocr
4
- ------------------------------
3
+ novel_downloader.utils.fontocr.core
4
+ -----------------------------------
5
5
 
6
6
  This class provides utility methods for optical character recognition (OCR),
7
7
  primarily used for decrypting custom font encryption.
8
8
  """
9
9
 
10
- __all__ = [
11
- "FontOCR",
12
- "get_font_ocr",
13
- ]
14
- __version__ = "4.0"
15
-
10
+ import io
16
11
  import logging
17
- from collections.abc import Generator
18
- from typing import Any, TypeVar
12
+ from pathlib import Path
13
+ from typing import Any
19
14
 
20
15
  import numpy as np
21
- from paddleocr import TextRecognition # takes 5 ~ 12 sec to init
16
+ from fontTools.ttLib import TTFont
17
+ from paddleocr import TextRecognition
22
18
  from PIL import Image, ImageDraw, ImageFont
23
19
  from PIL.Image import Transpose
24
20
 
25
- T = TypeVar("T")
26
21
  logger = logging.getLogger(__name__)
27
22
 
28
23
 
@@ -39,22 +34,20 @@ class FontOCR:
39
34
  device: str | None = None,
40
35
  precision: str = "fp32",
41
36
  cpu_threads: int = 10,
42
- batch_size: int = 32,
43
- threshold: float = 0.0,
44
37
  **kwargs: Any,
45
38
  ) -> None:
46
39
  """
47
40
  Initialize a FontOCR instance.
48
41
 
49
- :param batch_size: batch size for OCR inference (minimum 1)
50
- :param ocr_weight: weight factor for OCR-based prediction scores
51
- :param vec_weight: weight factor for vector-based similarity scores
52
- :param threshold: minimum confidence threshold for predictions [0.0-1.0]
42
+ :param model_name: If set to None, PP-OCRv5_server_rec is used.
43
+ :param model_dir: Model storage path.
44
+ :param input_shape: Input image size for the model in the format (C, H, W).
45
+ :param device: Device for inference.
46
+ :param precision: Precision for TensorRT.
47
+ :param cpu_threads: Number of threads to use for inference on CPUs.
53
48
  :param kwargs: reserved for future extensions
54
49
  """
55
- self._batch_size = batch_size
56
- self._threshold = threshold
57
- self._ocr_model = TextRecognition(
50
+ self._ocr_model = TextRecognition( # takes 5 ~ 12 sec to init
58
51
  model_name=model_name,
59
52
  model_dir=model_dir,
60
53
  input_shape=input_shape,
@@ -66,18 +59,18 @@ class FontOCR:
66
59
  def predict(
67
60
  self,
68
61
  images: list[np.ndarray],
69
- top_k: int = 1,
70
- ) -> list[list[tuple[str, float]]]:
62
+ batch_size: int = 1,
63
+ ) -> list[tuple[str, float]]:
71
64
  """
72
65
  Run OCR on input images.
73
66
 
74
67
  :param images: list of np.ndarray objects to predict
75
- :param top_k: number of top candidates to return per image
76
- :return: list of lists containing (character, score)
68
+ :param batch_size: batch size for OCR inference (minimum 1)
69
+ :return: list of tuple containing (character, score)
77
70
  """
78
71
  return [
79
- [(pred.get("rec_text"), pred.get("rec_score"))]
80
- for pred in self._ocr_model.predict(images, batch_size=self._batch_size)
72
+ (pred.get("rec_text"), pred.get("rec_score"))
73
+ for pred in self._ocr_model.predict(images, batch_size=batch_size)
81
74
  ]
82
75
 
83
76
  @staticmethod
@@ -86,7 +79,7 @@ class FontOCR:
86
79
  render_font: ImageFont.FreeTypeFont,
87
80
  is_reflect: bool = False,
88
81
  size: int = 64,
89
- ) -> Image.Image | None:
82
+ ) -> Image.Image:
90
83
  """
91
84
  Render a single character into an RGB square image.
92
85
 
@@ -107,10 +100,6 @@ class FontOCR:
107
100
  if is_reflect:
108
101
  img = img.transpose(Transpose.FLIP_LEFT_RIGHT)
109
102
 
110
- img_np = np.array(img)
111
- if np.unique(img_np).size == 1:
112
- return None
113
-
114
103
  return img
115
104
 
116
105
  @staticmethod
@@ -119,7 +108,7 @@ class FontOCR:
119
108
  render_font: ImageFont.FreeTypeFont,
120
109
  is_reflect: bool = False,
121
110
  size: int = 64,
122
- ) -> np.ndarray | None:
111
+ ) -> np.ndarray:
123
112
  """
124
113
  Render a single character into an RGB square image.
125
114
 
@@ -140,11 +129,7 @@ class FontOCR:
140
129
  if is_reflect:
141
130
  img = img.transpose(Transpose.FLIP_LEFT_RIGHT)
142
131
 
143
- img_np = np.array(img)
144
- if np.unique(img_np).size == 1:
145
- return None
146
-
147
- return img_np
132
+ return np.array(img)
148
133
 
149
134
  @staticmethod
150
135
  def render_text_image(
@@ -176,32 +161,56 @@ class FontOCR:
176
161
  return img
177
162
 
178
163
  @staticmethod
179
- def _chunked(seq: list[T], size: int) -> Generator[list[T], None, None]:
164
+ def load_image_array_from_bytes(data: bytes) -> np.ndarray:
180
165
  """
181
- Yield successive chunks of `seq` of length `size`.
166
+ Decode image bytes into an RGB NumPy array.
167
+
168
+ Reads common image formats (e.g. PNG/JPEG/WebP) from an
169
+ in-memory byte buffer using Pillow, converts the image to RGB,
170
+ and returns a NumPy array suitable for OCR inference.
171
+
172
+ :param data: Image file content as raw bytes.
173
+ :return: NumPy array of shape (H, W, 3), dtype=uint8, in RGB order.
174
+ :raises PIL.UnidentifiedImageError, OSError: If input bytes cannot be decoded.
182
175
  """
183
- for i in range(0, len(seq), size):
184
- yield seq[i : i + size]
176
+ with Image.open(io.BytesIO(data)) as im:
177
+ im = im.convert("RGB")
178
+ return np.asarray(im)
185
179
 
180
+ @staticmethod
181
+ def load_render_font(
182
+ font_path: Path | str, char_size: int = 52
183
+ ) -> ImageFont.FreeTypeFont:
184
+ """
185
+ Load a FreeType font face at the given pixel size for rendering helpers.
186
186
 
187
- _font_ocr: FontOCR | None = None
187
+ :param font_path: Path to a TTF/OTF font file.
188
+ :param char_size: Target glyph size in pixels (e.g. 52).
189
+ :return: A PIL `ImageFont.FreeTypeFont` instance.
190
+ :raises OSError: If the font file cannot be opened by PIL.
191
+ """
192
+ return ImageFont.truetype(str(font_path), char_size)
188
193
 
194
+ @staticmethod
195
+ def extract_font_charset(font_path: Path | str) -> set[str]:
196
+ """
197
+ Extract the set of Unicode characters encoded by a TrueType/OpenType font.
189
198
 
190
- def get_font_ocr(
191
- model_name: str | None = None,
192
- model_dir: str | None = None,
193
- input_shape: tuple[int, int, int] | None = None,
194
- batch_size: int = 32,
195
- ) -> FontOCR:
196
- """
197
- Return the singleton FontOCR, initializing it on first use.
198
- """
199
- global _font_ocr
200
- if _font_ocr is None:
201
- _font_ocr = FontOCR(
202
- model_name=model_name,
203
- model_dir=model_dir,
204
- input_shape=input_shape,
205
- batch_size=batch_size,
206
- )
207
- return _font_ocr
199
+ This reads the font's best available character map (cmap) and returns the
200
+ corresponding set of characters.
201
+
202
+ :param font_path: Path to a TTF/OTF font file.
203
+ :return: A set of Unicode characters present in the font's cmap.
204
+ """
205
+ with TTFont(font_path) as font_ttf:
206
+ cmap = font_ttf.getBestCmap() or {}
207
+
208
+ charset: set[str] = set()
209
+ for cp in cmap:
210
+ # guard against invalid/surrogate code points
211
+ if 0 <= cp <= 0x10FFFF and not (0xD800 <= cp <= 0xDFFF):
212
+ try:
213
+ charset.add(chr(cp))
214
+ except ValueError:
215
+ continue
216
+ return charset
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.utils.fontocr.loader
4
+ -------------------------------------
5
+
6
+ Lazily load the FontOCR class.
7
+ """
8
+
9
+ import logging
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from .core import FontOCR
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ _FONT_OCR: "FontOCR | None" = None
18
+
19
+
20
+ def get_font_ocr(
21
+ model_name: str | None = None,
22
+ model_dir: str | None = None,
23
+ input_shape: tuple[int, int, int] | None = None,
24
+ ) -> "FontOCR | None":
25
+ """
26
+ Try to initialize and return a singleton FontOCR instance.
27
+ Returns None if FontOCR or its dependencies are not available.
28
+ """
29
+ global _FONT_OCR
30
+ if _FONT_OCR is None:
31
+ try:
32
+ from .core import FontOCR
33
+
34
+ _FONT_OCR = FontOCR(
35
+ model_name=model_name,
36
+ model_dir=model_dir,
37
+ input_shape=input_shape,
38
+ )
39
+ except ImportError:
40
+ logger.warning(
41
+ "FontOCR dependency not available "
42
+ "(paddleocr / numpy / pillow / fonttools). "
43
+ "Font decoding will be skipped."
44
+ )
45
+ return None
46
+ except Exception as e:
47
+ logger.warning("FontOCR initialization failed: %s", e, exc_info=True)
48
+ return None
49
+
50
+ return _FONT_OCR
@@ -4,17 +4,17 @@ novel_downloader.utils.logger
4
4
  -----------------------------
5
5
 
6
6
  Provides a configurable logging setup for Python applications.
7
- Log files are rotated daily and named with the given logger name and current date.
8
7
  """
9
8
 
9
+ from __future__ import annotations
10
+
10
11
  __all__ = ["setup_logging"]
11
12
 
12
13
  import logging
13
- from datetime import datetime
14
14
  from logging.handlers import TimedRotatingFileHandler
15
15
  from pathlib import Path
16
16
 
17
- from .constants import LOGGER_DIR, LOGGER_NAME
17
+ from .constants import LOGGER_DIR, PACKAGE_NAME
18
18
 
19
19
  LOG_LEVELS: dict[str, int] = {
20
20
  "DEBUG": logging.DEBUG,
@@ -22,75 +22,99 @@ LOG_LEVELS: dict[str, int] = {
22
22
  "WARNING": logging.WARNING,
23
23
  "ERROR": logging.ERROR,
24
24
  }
25
+ _MUTE_LOGGERS: set[str] = {
26
+ "fontTools.ttLib.tables._p_o_s_t",
27
+ }
28
+
29
+
30
+ def _normalize_level(level: int | str) -> int:
31
+ if isinstance(level, int):
32
+ return level
33
+ if isinstance(level, str):
34
+ lvl = LOG_LEVELS.get(level.upper())
35
+ if isinstance(lvl, int):
36
+ return lvl
37
+ return logging.INFO
25
38
 
26
39
 
27
40
  def setup_logging(
28
- log_filename_prefix: str | None = None,
29
- log_level: str | None = None,
41
+ log_filename: str | None = None,
42
+ console_level: int | str = "INFO",
43
+ file_level: int | str = "DEBUG",
30
44
  log_dir: str | Path | None = None,
45
+ *,
46
+ console: bool = True,
47
+ file: bool = True,
48
+ backup_count: int = 7,
49
+ when: str = "midnight",
31
50
  ) -> logging.Logger:
32
51
  """
33
- Create and configure a logger for both console and rotating file output.
52
+ Create and configure a package logger with optional console and file handlers.
34
53
 
35
- :param log_filename_prefix: Prefix for the log file name.
36
- :param log_level: Minimum log level to show in console
37
- ("DEBUG", "INFO", "WARNING", "ERROR")
54
+ :param log_filename: Base log file name (without date suffix).
55
+ :param console_level: Minimum level for the console handler (string or int).
56
+ :param file_level: Minimum level for the file handler (string or int).
38
57
  :param log_dir: Directory where log files will be saved.
39
- :return: A fully configured logger instance.
58
+ :param console: Add a console handler.
59
+ :param file: Add a file handler.
60
+ :param backup_count: How many rotated files to keep.
61
+ :param when: Rotation interval for TimedRotatingFileHandler (e.g., "midnight").
62
+ :return: The configured logger.
40
63
  """
41
- ft_logger = logging.getLogger("fontTools.ttLib.tables._p_o_s_t")
42
- ft_logger.setLevel(logging.ERROR)
43
- ft_logger.propagate = False
44
-
45
- # Determine console level (default INFO)
46
- level_str: str = log_level or "INFO"
47
- console_level: int = LOG_LEVELS.get(level_str) or logging.INFO
48
-
49
- # Resolve log file path
50
- log_path = Path(log_dir) if log_dir else LOGGER_DIR
51
- log_path.mkdir(parents=True, exist_ok=True)
52
-
53
- # Resolve log file name
54
- if not log_filename_prefix:
55
- log_filename_prefix = LOGGER_NAME
56
- date_str = datetime.now().strftime("%Y-%m-%d")
57
- log_filename = log_path / f"{log_filename_prefix}_{date_str}.log"
64
+ # Tame noisy third-party loggers
65
+ for name in _MUTE_LOGGERS:
66
+ ml = logging.getLogger(name)
67
+ ml.setLevel(logging.ERROR)
68
+ ml.propagate = False
58
69
 
59
- # Create or retrieve logger
60
- logger = logging.getLogger(LOGGER_NAME)
61
- logger.setLevel(logging.DEBUG) # Capture everything, filter by handlers
62
- logger.propagate = False
70
+ logger = logging.getLogger(PACKAGE_NAME)
71
+ logger.setLevel(logging.DEBUG)
72
+ logger.propagate = False # otherwise may affected by PaddleOCR
63
73
 
64
74
  # Clear existing handlers to avoid duplicate logs
65
75
  if logger.hasHandlers():
66
76
  logger.handlers.clear()
67
77
 
68
- # File handler: rotates at midnight, keeps 7 days of logs
69
- file_handler = TimedRotatingFileHandler(
70
- filename=str(log_filename),
71
- when="midnight",
72
- interval=1,
73
- backupCount=7,
74
- encoding="utf-8",
75
- utc=False,
76
- )
77
- file_formatter = logging.Formatter(
78
- fmt="%(asctime)s [%(levelname)s] %(name)s.%(funcName)s: %(message)s",
79
- datefmt="%Y-%m-%d %H:%M:%S",
80
- )
81
- file_handler.setFormatter(file_formatter)
82
- file_handler.setLevel(logging.DEBUG)
83
- logger.addHandler(file_handler)
78
+ # File handler (rotates daily)
79
+ if file:
80
+ file_level = _normalize_level(file_level)
81
+
82
+ base_dir = Path(log_dir) if log_dir else LOGGER_DIR
83
+ base_dir.mkdir(parents=True, exist_ok=True)
84
+ base_name = log_filename or PACKAGE_NAME
85
+ log_path = base_dir / f"{base_name}.log"
86
+
87
+ fh = TimedRotatingFileHandler(
88
+ filename=log_path,
89
+ when=when,
90
+ interval=1,
91
+ backupCount=backup_count,
92
+ encoding="utf-8",
93
+ utc=False,
94
+ delay=True,
95
+ )
96
+
97
+ file_formatter = logging.Formatter(
98
+ fmt="%(asctime)s [%(levelname)s] %(name)s.%(funcName)s: %(message)s",
99
+ datefmt="%Y-%m-%d %H:%M:%S",
100
+ )
101
+ fh.setFormatter(file_formatter)
102
+ fh.setLevel(file_level)
103
+ logger.addHandler(fh)
104
+
105
+ print(f"Logging to {log_path}")
84
106
 
85
107
  # Console handler
86
- console_handler = logging.StreamHandler()
87
- console_formatter = logging.Formatter(
88
- fmt="%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
89
- )
90
- console_handler.setFormatter(console_formatter)
91
- console_handler.setLevel(console_level)
92
- logger.addHandler(console_handler)
93
-
94
- print(f"Logging to {log_path}")
108
+ if console:
109
+ console_level = _normalize_level(console_level)
110
+
111
+ console_handler = logging.StreamHandler()
112
+ console_formatter = logging.Formatter(
113
+ fmt="%(asctime)s [%(levelname)s] %(message)s",
114
+ datefmt="%H:%M:%S",
115
+ )
116
+ console_handler.setFormatter(console_formatter)
117
+ console_handler.setLevel(console_level)
118
+ logger.addHandler(console_handler)
95
119
 
96
120
  return logger
@@ -8,7 +8,6 @@ Utilities for handling HTTP requests and downloading remote resources.
8
8
 
9
9
  __all__ = ["download"]
10
10
 
11
- import logging
12
11
  from pathlib import Path
13
12
  from typing import Literal
14
13
  from urllib.parse import unquote, urlparse
@@ -19,10 +18,7 @@ from urllib3.util.retry import Retry
19
18
 
20
19
  from .constants import DEFAULT_HEADERS
21
20
  from .file_utils import sanitize_filename
22
- from .file_utils.io import _get_non_conflicting_path, write_file
23
-
24
- logger = logging.getLogger(__name__)
25
- _DEFAULT_CHUNK_SIZE = 8192 # 8KB per chunk for streaming downloads
21
+ from .file_utils.io import _unique_path, write_file
26
22
 
27
23
 
28
24
  def _normalize_url(url: str) -> str:
@@ -37,8 +33,8 @@ def _normalize_url(url: str) -> str:
37
33
 
38
34
 
39
35
  def _build_filepath(
40
- folder: Path,
41
36
  url: str,
37
+ folder: Path,
42
38
  filename: str | None,
43
39
  default_suffix: str,
44
40
  on_exist: Literal["overwrite", "skip", "rename"],
@@ -48,20 +44,18 @@ def _build_filepath(
48
44
 
49
45
  raw_name = filename or url_path.name or "unnamed"
50
46
  name = sanitize_filename(raw_name)
51
- suffix = default_suffix or url_path.suffix
52
- if suffix and not suffix.startswith("."):
53
- suffix = "." + suffix
54
47
 
55
- file_path = folder / name
56
- if not file_path.suffix and suffix:
57
- file_path = file_path.with_suffix(suffix)
48
+ if "." not in name and (url_path.suffix or default_suffix):
49
+ name += url_path.suffix or default_suffix
58
50
 
51
+ file_path = folder / name
59
52
  if on_exist == "rename":
60
- file_path = _get_non_conflicting_path(file_path)
53
+ file_path = _unique_path(file_path)
54
+
61
55
  return file_path
62
56
 
63
57
 
64
- def _make_session(
58
+ def _new_session(
65
59
  retries: int,
66
60
  backoff: float,
67
61
  headers: dict[str, str] | None,
@@ -72,7 +66,7 @@ def _make_session(
72
66
  retry = Retry(
73
67
  total=retries,
74
68
  backoff_factor=backoff,
75
- status_forcelist=[429, 500, 502, 503, 504],
69
+ status_forcelist=[413, 429, 500, 502, 503, 504],
76
70
  allowed_methods={"GET", "HEAD", "OPTIONS"},
77
71
  )
78
72
  adapter = HTTPAdapter(max_retries=retry)
@@ -90,10 +84,8 @@ def download(
90
84
  retries: int = 3,
91
85
  backoff: float = 0.5,
92
86
  headers: dict[str, str] | None = None,
93
- stream: bool = False,
94
87
  on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
95
88
  default_suffix: str = "",
96
- chunk_size: int = _DEFAULT_CHUNK_SIZE,
97
89
  ) -> Path | None:
98
90
  """
99
91
  Download a URL to disk, with retries, optional rename/skip, and cleanup on failure.
@@ -105,10 +97,8 @@ def download(
105
97
  :param retries: GET retry count.
106
98
  :param backoff: exponential backoff base.
107
99
  :param headers: optional headers.
108
- :param stream: Whether to stream the response.
109
100
  :param on_exist: if 'skip', return filepath; if 'rename', auto-rename.
110
101
  :param default_suffix: used if no suffix in URL or filename.
111
- :param chunk_size: streaming chunk size.
112
102
  :return: path to the downloaded file.
113
103
  """
114
104
  url = _normalize_url(url)
@@ -117,8 +107,8 @@ def download(
117
107
  folder.mkdir(parents=True, exist_ok=True)
118
108
 
119
109
  save_path = _build_filepath(
120
- folder,
121
110
  url,
111
+ folder,
122
112
  filename,
123
113
  default_suffix,
124
114
  on_exist,
@@ -126,34 +116,20 @@ def download(
126
116
 
127
117
  # Handle existing file
128
118
  if save_path.exists() and on_exist == "skip":
129
- logger.debug("Skipping download; file exists: %s", save_path)
130
119
  return save_path
131
120
 
132
- with _make_session(retries, backoff, headers) as session:
121
+ with _new_session(retries, backoff, headers) as session:
133
122
  try:
134
- resp = session.get(url, timeout=timeout, stream=stream)
123
+ resp = session.get(url, timeout=timeout)
135
124
  resp.raise_for_status()
136
- except Exception as e:
137
- logger.warning("[download] request failed: %s", e)
138
- return None
139
125
 
140
- # Write to disk
141
- if stream:
142
- try:
143
- with open(save_path, "wb") as f:
144
- for chunk in resp.iter_content(chunk_size=chunk_size):
145
- if chunk:
146
- f.write(chunk)
147
- return save_path
148
- except Exception as e:
149
- logger.warning("[download] write failed: %s", e)
150
- save_path.unlink(missing_ok=True)
151
- return None
152
- else:
126
+ # Write to disk
153
127
  return write_file(
154
128
  content=resp.content,
155
129
  filepath=save_path,
156
- write_mode="wb",
157
130
  on_exist=on_exist,
158
131
  )
132
+ except Exception:
133
+ return None
134
+
159
135
  return None