novel-downloader 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +3 -3
  3. novel_downloader/cli/export.py +1 -1
  4. novel_downloader/cli/ui.py +7 -7
  5. novel_downloader/config/adapter.py +191 -154
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/exporters/common/txt.py +9 -9
  8. novel_downloader/core/exporters/linovelib/txt.py +9 -9
  9. novel_downloader/core/fetchers/qidian.py +20 -35
  10. novel_downloader/core/interfaces/fetcher.py +2 -2
  11. novel_downloader/core/interfaces/parser.py +2 -2
  12. novel_downloader/core/parsers/base.py +1 -0
  13. novel_downloader/core/parsers/eightnovel.py +2 -2
  14. novel_downloader/core/parsers/esjzone.py +3 -3
  15. novel_downloader/core/parsers/qidian/main_parser.py +747 -12
  16. novel_downloader/core/parsers/qidian/utils/__init__.py +2 -21
  17. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  18. novel_downloader/core/parsers/xiguashuwu.py +6 -12
  19. novel_downloader/locales/en.json +3 -3
  20. novel_downloader/locales/zh.json +3 -3
  21. novel_downloader/utils/__init__.py +0 -2
  22. novel_downloader/utils/chapter_storage.py +2 -3
  23. novel_downloader/utils/constants.py +1 -3
  24. novel_downloader/utils/cookies.py +32 -17
  25. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  26. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  27. novel_downloader/utils/epub/__init__.py +2 -3
  28. novel_downloader/utils/epub/builder.py +6 -6
  29. novel_downloader/utils/epub/constants.py +5 -5
  30. novel_downloader/utils/epub/documents.py +7 -7
  31. novel_downloader/utils/epub/models.py +8 -8
  32. novel_downloader/utils/epub/utils.py +10 -10
  33. novel_downloader/utils/file_utils/io.py +48 -73
  34. novel_downloader/utils/file_utils/normalize.py +1 -7
  35. novel_downloader/utils/file_utils/sanitize.py +4 -11
  36. novel_downloader/utils/fontocr/__init__.py +13 -0
  37. novel_downloader/utils/{fontocr.py → fontocr/core.py} +70 -61
  38. novel_downloader/utils/fontocr/loader.py +50 -0
  39. novel_downloader/utils/logger.py +80 -56
  40. novel_downloader/utils/network.py +16 -40
  41. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  42. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  43. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  44. novel_downloader/web/main.py +1 -1
  45. novel_downloader/web/pages/search.py +3 -3
  46. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/METADATA +2 -1
  47. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/RECORD +51 -55
  48. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  49. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  50. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  51. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  52. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  53. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  54. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  55. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +0 -0
  56. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  57. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -6,25 +6,6 @@ novel_downloader.core.parsers.qidian.utils
6
6
  Utility functions and helpers for parsing and decrypting Qidian novel pages
7
7
  """
8
8
 
9
- __all__ = [
10
- "find_ssr_page_context",
11
- "extract_chapter_info",
12
- "is_restricted_page",
13
- "vip_status",
14
- "can_view_chapter",
15
- "is_encrypted",
16
- "is_duplicated",
17
- "QidianNodeDecryptor",
18
- "get_decryptor",
19
- ]
9
+ __all__ = ["get_decryptor"]
20
10
 
21
- from .helpers import (
22
- can_view_chapter,
23
- extract_chapter_info,
24
- find_ssr_page_context,
25
- is_duplicated,
26
- is_encrypted,
27
- is_restricted_page,
28
- vip_status,
29
- )
30
- from .node_decryptor import QidianNodeDecryptor, get_decryptor
11
+ from .node_decryptor import get_decryptor
@@ -36,10 +36,10 @@ class QidianNodeDecryptor:
36
36
  3. Download the remote Fock module JS if not already present.
37
37
 
38
38
  Calling `decrypt()` will:
39
- - Write a temp JSON input file with [ciphertext, chapter_id, fkp, fuid].
40
- - Spawn `node qidian_decrypt_node.js <in> <out>`.
41
- - Read and return the decrypted text.
42
- - Clean up the temp files.
39
+ * Write a temp JSON input file with [ciphertext, chapter_id, fkp, fuid].
40
+ * Spawn `node qidian_decrypt_node.js <in> <out>`.
41
+ * Read and return the decrypted text.
42
+ * Clean up the temp files.
43
43
  """
44
44
 
45
45
  QIDIAN_FOCK_JS_URL: str = (
@@ -29,6 +29,7 @@ from novel_downloader.utils.constants import (
29
29
  XIGUASHUWU_FONT_MAP_PATH,
30
30
  )
31
31
  from novel_downloader.utils.crypto_utils.aes_util import aes_cbc_decrypt
32
+ from novel_downloader.utils.fontocr import get_font_ocr
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
 
@@ -292,26 +293,19 @@ class XiguashuwuParser(BaseParser):
292
293
  :return: The recognized character (top-1) if OCR succeeds, otherwise None.
293
294
  """
294
295
  try:
295
- import io
296
-
297
- import numpy as np
298
- from PIL import Image
299
-
300
- from novel_downloader.utils.fontocr import get_font_ocr
296
+ ocr = get_font_ocr()
297
+ if not ocr:
298
+ return None
301
299
 
302
300
  resp = requests.get(url, headers=DEFAULT_USER_HEADERS, timeout=15)
303
301
  resp.raise_for_status()
304
302
 
305
- im = Image.open(io.BytesIO(resp.content)).convert("RGB")
306
- img_np = np.asarray(im)
303
+ img_np = ocr.load_image_array_from_bytes(resp.content)
307
304
 
308
- ocr = get_font_ocr(batch_size=1)
309
- char, score = ocr.predict([img_np], top_k=1)[0][0]
305
+ char, score = ocr.predict([img_np])[0]
310
306
 
311
307
  return char if score >= cls._CONF_THRESHOLD else None
312
308
 
313
- except ImportError:
314
- logger.warning("[Parser] FontOCR not available, font decoding will skip")
315
309
  except Exception as e:
316
310
  logger.warning("[Parser] Failed to ocr glyph image %s: %s", url, e)
317
311
  return None
@@ -39,9 +39,9 @@
39
39
  "login_description": "Description",
40
40
  "login_hint": "Hint",
41
41
  "login_use_config": "Using value from config.",
42
- "login_enter_password": "Enter password: ",
43
- "login_enter_cookie": "Enter cookies: ",
44
- "login_enter_value": "Enter value: ",
42
+ "login_enter_password": "Enter password",
43
+ "login_enter_cookie": "Enter cookies",
44
+ "login_enter_value": "Enter value",
45
45
  "login_required_field": "This field is required. Please enter a value.",
46
46
 
47
47
  "clean_logs": "Clean log directory",
@@ -39,9 +39,9 @@
39
39
  "login_description": "说明",
40
40
  "login_hint": "提示",
41
41
  "login_use_config": "使用配置中的默认值",
42
- "login_enter_password": "请输入密码: ",
43
- "login_enter_cookie": "请输入 Cookie: ",
44
- "login_enter_value": "请输入值: ",
42
+ "login_enter_password": "请输入密码",
43
+ "login_enter_cookie": "请输入 Cookie",
44
+ "login_enter_value": "请输入值",
45
45
  "login_required_field": "该字段是必填项, 请重新输入",
46
46
 
47
47
  "clean_failed": "删除失败: {path}",
@@ -11,7 +11,6 @@ __all__ = [
11
11
  "TextCleaner",
12
12
  "parse_cookies",
13
13
  "get_cookie_value",
14
- "rc4_crypt",
15
14
  "sanitize_filename",
16
15
  "write_file",
17
16
  "download",
@@ -29,7 +28,6 @@ from .cookies import (
29
28
  get_cookie_value,
30
29
  parse_cookies,
31
30
  )
32
- from .crypto_utils import rc4_crypt
33
31
  from .file_utils import (
34
32
  sanitize_filename,
35
33
  write_file,
@@ -13,7 +13,7 @@ import json
13
13
  import sqlite3
14
14
  import types
15
15
  from pathlib import Path
16
- from typing import Any, Self, cast
16
+ from typing import Any, Self
17
17
 
18
18
  from novel_downloader.models import ChapterDict
19
19
 
@@ -313,8 +313,7 @@ class ChapterStorage:
313
313
  @staticmethod
314
314
  def _load_dict(data: str) -> dict[str, Any]:
315
315
  try:
316
- parsed = json.loads(data)
317
- return cast(dict[str, Any], parsed)
316
+ return json.loads(data) or {}
318
317
  except Exception:
319
318
  return {}
320
319
 
@@ -16,14 +16,12 @@ from platformdirs import user_config_path
16
16
  # -----------------------------------------------------------------------------
17
17
  PACKAGE_NAME = "novel_downloader" # Python package name
18
18
  APP_NAME = "NovelDownloader" # Display name
19
- APP_DIR_NAME = PACKAGE_NAME # Directory name for platformdirs
20
- LOGGER_NAME = PACKAGE_NAME # Root logger name
21
19
 
22
20
  # -----------------------------------------------------------------------------
23
21
  # Base directories
24
22
  # -----------------------------------------------------------------------------
25
23
  # Base config directory (e.g. ~/AppData/Local/novel_downloader/)
26
- BASE_CONFIG_DIR = Path(user_config_path(APP_DIR_NAME, appauthor=False))
24
+ BASE_CONFIG_DIR = user_config_path(PACKAGE_NAME, appauthor=False)
27
25
  WORK_DIR = Path.cwd()
28
26
  PACKAGE_ROOT: Path = Path(__file__).parent.parent
29
27
  LOCALES_DIR: Path = PACKAGE_ROOT / "locales"
@@ -8,10 +8,11 @@ Utility for normalizing cookie input from user configuration.
8
8
 
9
9
  __all__ = ["parse_cookies", "get_cookie_value"]
10
10
 
11
+ import functools
11
12
  import json
12
13
  from collections.abc import Mapping
13
- from http.cookies import SimpleCookie
14
14
  from pathlib import Path
15
+ from typing import Any
15
16
 
16
17
 
17
18
  def parse_cookies(cookies: str | Mapping[str, str]) -> dict[str, str]:
@@ -27,10 +28,16 @@ def parse_cookies(cookies: str | Mapping[str, str]) -> dict[str, str]:
27
28
  :raises TypeError: If the input is neither string nor dict-like
28
29
  """
29
30
  if isinstance(cookies, str):
30
- filtered = "; ".join(pair for pair in cookies.split(";") if "=" in pair)
31
- parsed = SimpleCookie()
32
- parsed.load(filtered)
33
- return {k: v.value for k, v in parsed.items()}
31
+ result: dict[str, str] = {}
32
+ for part in cookies.split(";"):
33
+ if "=" not in part:
34
+ continue
35
+ key, value = part.split("=", 1)
36
+ key, value = key.strip(), value.strip()
37
+ if not key:
38
+ continue
39
+ result[key] = value
40
+ return result
34
41
  elif isinstance(cookies, Mapping):
35
42
  return {str(k).strip(): str(v).strip() for k, v in cookies.items()}
36
43
  raise TypeError("Unsupported cookie format: must be str or dict-like")
@@ -38,17 +45,25 @@ def parse_cookies(cookies: str | Mapping[str, str]) -> dict[str, str]:
38
45
 
39
46
  def get_cookie_value(state_files: list[Path], key: str) -> str:
40
47
  for state_file in state_files:
41
- try:
42
- with state_file.open("r", encoding="utf-8") as f:
43
- data = json.load(f)
44
- except Exception:
45
- continue
46
-
48
+ mtime = state_file.stat().st_mtime
49
+ data = load_state_file(state_file, mtime)
47
50
  cookies = data.get("cookies", [])
48
- for cookie in cookies:
49
- if cookie.get("name") != key:
50
- continue
51
- value = cookie.get("value")
52
- if isinstance(value, str):
53
- return value
51
+ value = next(
52
+ (
53
+ c.get("value")
54
+ for c in cookies
55
+ if c.get("name") == key and isinstance(c.get("value"), str)
56
+ ),
57
+ None,
58
+ )
59
+ if isinstance(value, str):
60
+ return value
54
61
  return ""
62
+
63
+
64
+ @functools.cache
65
+ def load_state_file(state_file: Path, mtime: float = 0.0) -> dict[str, Any]:
66
+ try:
67
+ return json.loads(state_file.read_text(encoding="utf-8")) or {}
68
+ except (OSError, json.JSONDecodeError):
69
+ return {}
@@ -5,9 +5,3 @@ novel_downloader.utils.crypto_utils
5
5
 
6
6
  Generic cryptographic utilities
7
7
  """
8
-
9
- __all__ = [
10
- "rc4_crypt",
11
- ]
12
-
13
- from .rc4 import rc4_crypt
@@ -3,62 +3,52 @@
3
3
  novel_downloader.utils.crypto_utils.rc4
4
4
  ---------------------------------------
5
5
 
6
- RC4 stream cipher for simple text encryption and decryption.
6
+ Minimal RC4 stream cipher implementation.
7
7
  """
8
8
 
9
- import base64
10
9
 
11
-
12
- def rc4_crypt(
13
- key: str,
14
- data: str,
15
- *,
16
- mode: str = "encrypt",
17
- encoding: str = "utf-8",
18
- ) -> str:
10
+ def rc4_init(key: bytes) -> list[int]:
19
11
  """
20
- Encrypt or decrypt data using RC4 and Base64.
21
-
22
- :param key: RC4 key (will be encoded using the specified encoding).
23
- :param data: Plain-text (for 'encrypt') or Base64 cipher-text (for 'decrypt').
24
- :param mode: Operation mode, either 'encrypt' or 'decrypt'. Defaults to 'encrypt'.
25
- :param encoding: Character encoding for key and returned string. Defaults 'utf-8'.
26
-
27
- :return: Base64 cipher-text (for encryption) or decoded plain-text (for decryption).
28
-
29
- :raises ValueError: If mode is not 'encrypt' or 'decrypt'.
12
+ Key-Scheduling Algorithm (KSA)
30
13
  """
14
+ S = list(range(256))
15
+ j = 0
16
+ klen = len(key)
17
+ for i in range(256):
18
+ j = (j + S[i] + key[i % klen]) & 0xFF
19
+ S[i], S[j] = S[j], S[i]
20
+ return S
31
21
 
32
- def _rc4(key_bytes: bytes, data_bytes: bytes) -> bytes:
33
- # Key-Scheduling Algorithm (KSA)
34
- S = list(range(256))
35
- j = 0
36
- for i in range(256):
37
- j = (j + S[i] + key_bytes[i % len(key_bytes)]) % 256
38
- S[i], S[j] = S[j], S[i]
39
-
40
- # Pseudo-Random Generation Algorithm (PRGA)
41
- i = j = 0
42
- out: list[int] = []
43
- for char in data_bytes:
44
- i = (i + 1) % 256
45
- j = (j + S[i]) % 256
46
- S[i], S[j] = S[j], S[i]
47
- K = S[(S[i] + S[j]) % 256]
48
- out.append(char ^ K)
49
-
50
- return bytes(out)
51
22
 
52
- key_bytes = key.encode(encoding)
53
-
54
- if mode == "encrypt":
55
- plain_bytes = data.encode(encoding)
56
- cipher_bytes = _rc4(key_bytes, plain_bytes)
57
- return base64.b64encode(cipher_bytes).decode(encoding)
23
+ def rc4_stream(S_init: list[int], data: bytes) -> bytes:
24
+ """
25
+ Pseudo-Random Generation Algorithm (PRGA)
26
+ """
27
+ # make a copy of S since it mutates during PRGA
28
+ S = S_init.copy()
29
+ i = 0
30
+ j = 0
31
+ out = bytearray(len(data))
32
+ for idx, ch in enumerate(data):
33
+ i = (i + 1) & 0xFF
34
+ j = (j + S[i]) & 0xFF
35
+ S[i], S[j] = S[j], S[i]
36
+ K = S[(S[i] + S[j]) & 0xFF]
37
+ out[idx] = ch ^ K
38
+
39
+ return bytes(out)
40
+
41
+
42
+ def rc4_cipher(key: bytes, data: bytes) -> bytes:
43
+ """
44
+ RC4 stream cipher.
58
45
 
59
- if mode == "decrypt":
60
- cipher_bytes = base64.b64decode(data)
61
- plain_bytes = _rc4(key_bytes, cipher_bytes)
62
- return plain_bytes.decode(encoding, errors="replace")
46
+ It performs the standard Key-Scheduling Algorithm (KSA) and
47
+ Pseudo-Random Generation Algorithm (PRGA) to produce the RC4 keystream.
63
48
 
64
- raise ValueError("Mode must be 'encrypt' or 'decrypt'.")
49
+ :param key: RC4 key as bytes (must not be empty)
50
+ :param data: plaintext or ciphertext as bytes
51
+ :return: XORed bytes (encrypt/decrypt are identical)
52
+ """
53
+ S = rc4_init(key)
54
+ return rc4_stream(S, data)
@@ -6,9 +6,8 @@ novel_downloader.utils.epub
6
6
  Top-level package for EPUB export utilities.
7
7
 
8
8
  Key components:
9
-
10
- - EpubBuilder : orchestrates metadata, manifest, spine, navigation, and resources
11
- - Chapter, Volume : represent and render content sections and volume intros
9
+ * EpubBuilder : orchestrates metadata, manifest, spine, navigation, and resources
10
+ * Chapter, Volume : represent and render content sections and volume intros
12
11
 
13
12
  Usage example:
14
13
 
@@ -4,14 +4,14 @@ novel_downloader.utils.epub.builder
4
4
  -----------------------------------
5
5
 
6
6
  Orchestrates the end-to-end EPUB build process by:
7
- - Managing metadata (title, author, description, language, etc.)
8
- - Collecting and deduplicating resources (chapters, images, stylesheets)
9
- - Registering everything in the OPF manifest and spine
10
- - Generating nav.xhtml, toc.ncx, content.opf, and the zipped .epub file
7
+ * Managing metadata (title, author, description, language, etc.)
8
+ * Collecting and deduplicating resources (chapters, images, stylesheets)
9
+ * Registering everything in the OPF manifest and spine
10
+ * Generating nav.xhtml, toc.ncx, content.opf, and the zipped .epub file
11
11
 
12
12
  Provides:
13
- - methods to add chapters, volumes, images, and styles
14
- - a clean `export()` entry point that writes the final EPUB archive
13
+ * methods to add chapters, volumes, images, and styles
14
+ * a clean `export()` entry point that writes the final EPUB archive
15
15
  """
16
16
 
17
17
  import zipfile
@@ -4,11 +4,11 @@ novel_downloader.utils.epub.constants
4
4
  -------------------------------------
5
5
 
6
6
  EPUB-specific constants used by the builder, including:
7
- - Directory names for OEBPS structure
8
- - XML namespace URIs
9
- - Package attributes and document-type declarations
10
- - Media type mappings for images
11
- - Template strings for container.xml and cover image HTML
7
+ * Directory names for OEBPS structure
8
+ * XML namespace URIs
9
+ * Package attributes and document-type declarations
10
+ * Media type mappings for images
11
+ * Template strings for container.xml and cover image HTML
12
12
  """
13
13
 
14
14
  ROOT_PATH = "OEBPS"
@@ -4,9 +4,9 @@ novel_downloader.utils.epub.documents
4
4
  -------------------------------------
5
5
 
6
6
  Defines the classes that render EPUB navigation and packaging documents:
7
- - NavDocument: builds the XHTML nav.xhtml (EPUB 3)
8
- - NCXDocument: builds the NCX XML navigation map (EPUB 2)
9
- - OpfDocument: builds the content.opf package document
7
+ * NavDocument: builds the XHTML nav.xhtml (EPUB 3)
8
+ * NCXDocument: builds the NCX XML navigation map (EPUB 2)
9
+ * OpfDocument: builds the content.opf package document
10
10
  """
11
11
 
12
12
  from collections.abc import Sequence
@@ -234,10 +234,10 @@ class OpfDocument(EpubResource):
234
234
  Generate the content.opf XML, which defines metadata, manifest, and spine.
235
235
 
236
236
  This function outputs a complete OPF package document that includes:
237
- - <metadata>: title, author, language, identifiers, etc.
238
- - <manifest>: all resource entries
239
- - <spine>: the reading order of the content
240
- - <guide>: optional references like cover page
237
+ * <metadata>: title, author, language, identifiers, etc.
238
+ * <manifest>: all resource entries
239
+ * <spine>: the reading order of the content
240
+ * <guide>: optional references like cover page
241
241
 
242
242
  :return: A string containing the full OPF XML content.
243
243
  """
@@ -4,14 +4,14 @@ novel_downloader.utils.epub.models
4
4
  ----------------------------------
5
5
 
6
6
  Defines the core EPUB data models and resource classes used by the builder:
7
- - Typed entries for table of contents (ChapterEntry, VolumeEntry)
8
- - Manifest and spine record types (ManifestEntry, SpineEntry)
9
- - Hierarchical NavPoint for NCX navigation
10
- - Base resource class (EpubResource) and specializations:
11
- - StyleSheet
12
- - ImageResource
13
- - Chapter (with XHTML serialization)
14
- - Volume container for grouping chapters with optional intro and cover
7
+ * Typed entries for table of contents (ChapterEntry, VolumeEntry)
8
+ * Manifest and spine record types (ManifestEntry, SpineEntry)
9
+ * Hierarchical NavPoint for NCX navigation
10
+ * Base resource class (EpubResource) and specializations:
11
+ * StyleSheet
12
+ * ImageResource
13
+ * Chapter (with XHTML serialization)
14
+ * Volume container for grouping chapters with optional intro and cover
15
15
  """
16
16
 
17
17
  from __future__ import annotations
@@ -4,9 +4,9 @@ novel_downloader.utils.epub.utils
4
4
  ---------------------------------
5
5
 
6
6
  Pure utility functions for EPUB assembly, including:
7
- - Computing file hashes
8
- - Generating META-INF/container.xml
9
- - Constructing HTML snippets for the book intro and volume intro
7
+ * Computing file hashes
8
+ * Generating META-INF/container.xml
9
+ * Constructing HTML snippets for the book intro and volume intro
10
10
  """
11
11
 
12
12
  import hashlib
@@ -59,9 +59,9 @@ def build_book_intro(
59
59
  Build the HTML snippet for the overall book introduction.
60
60
 
61
61
  This includes:
62
- - A main heading ("Book Introduction")
63
- - A list of metadata items (title, author, categories, word count, status)
64
- - A "Summary" subheading and one or more paragraphs of summary text
62
+ * A main heading ("Book Introduction")
63
+ * A list of metadata items (title, author, categories, word count, status)
64
+ * A "Summary" subheading and one or more paragraphs of summary text
65
65
 
66
66
  :return: A HTML string for inclusion in `intro.xhtml`
67
67
  """
@@ -112,10 +112,10 @@ def build_volume_intro(
112
112
  Build the HTML snippet for a single-volume introduction.
113
113
 
114
114
  This includes:
115
- - A decorative border image (top and bottom)
116
- - A primary heading (volume main title)
117
- - An optional secondary line (subtitle)
118
- - One or more paragraphs of intro text
115
+ * A decorative border image (top and bottom)
116
+ * A primary heading (volume main title)
117
+ * An optional secondary line (subtitle)
118
+ * One or more paragraphs of intro text
119
119
 
120
120
  :param volume_title: e.g. "Volume 1 - The Beginning"
121
121
  :param volume_intro_text: multiline intro text for this volume
@@ -8,99 +8,74 @@ File I/O utilities for reading and writing data.
8
8
 
9
9
  __all__ = ["write_file"]
10
10
 
11
- import json
12
- import logging
13
11
  import tempfile
14
12
  from pathlib import Path
15
- from typing import Any, Literal
13
+ from typing import Literal
16
14
 
17
15
  from .sanitize import sanitize_filename
18
16
 
19
- logger = logging.getLogger(__name__)
20
17
 
21
- _JSON_INDENT_THRESHOLD = 50 * 1024 # bytes
22
-
23
-
24
- def _get_non_conflicting_path(path: Path) -> Path:
18
+ def _unique_path(path: Path, max_tries: int = 100) -> Path:
25
19
  """
26
- If the path exists, generate a new one by appending _1, _2, etc.
20
+ Return a unique file path by appending _1, _2, ... if needed.
21
+
22
+ Falls back to a UUID suffix if all attempts fail.
27
23
  """
28
- counter = 1
29
- new_path = path
30
- while new_path.exists():
31
- stem = path.stem
32
- suffix = path.suffix
33
- new_path = path.with_name(f"{stem}_{counter}{suffix}")
34
- counter += 1
35
- return new_path
24
+ if not path.exists():
25
+ return path
26
+
27
+ stem = path.stem
28
+ suffix = path.suffix
29
+
30
+ for counter in range(1, max_tries + 1):
31
+ candidate = path.with_name(f"{stem}_{counter}{suffix}")
32
+ if not candidate.exists():
33
+ return candidate
34
+
35
+ # fallback: append a random/unique suffix
36
+ import uuid
37
+
38
+ return path.with_name(f"{stem}_{uuid.uuid4().hex}{suffix}")
36
39
 
37
40
 
38
41
  def write_file(
39
- content: str | bytes | dict[Any, Any] | list[Any] | Any,
42
+ content: str | bytes,
40
43
  filepath: str | Path,
41
- write_mode: str = "w",
42
44
  *,
43
45
  on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
44
- dump_json: bool = False,
45
46
  encoding: str = "utf-8",
46
- ) -> Path | None:
47
+ ) -> Path:
47
48
  """
48
- Write content to a file safely with optional atomic behavior
49
- and JSON serialization.
50
-
51
- :param content: The content to write; can be text, bytes, or a
52
- JSON-serializable object.
53
- :param filepath: Destination path (str or Path).
54
- :param mode: File mode ('w', 'wb'). Auto-determined if None.
55
- :param on_exist: Behavior if file exists: 'overwrite', 'skip',
56
- or 'rename'.
57
- :param dump_json: If True, serialize content as JSON.
49
+ Write content to a file safely with atomic replacement.
50
+
51
+ :param content: The content to write; can be text or bytes.
52
+ :param filepath: Destination path.
53
+ :param on_exist: Behavior if file exists.
58
54
  :param encoding: Text encoding for writing.
59
- :return: Path if writing succeeds, None otherwise.
55
+ :return: The final path where the content was written.
56
+ :raise: Any I/O error such as PermissionError or OSError
60
57
  """
61
58
  path = Path(filepath)
62
59
  path = path.with_name(sanitize_filename(path.name))
63
60
  path.parent.mkdir(parents=True, exist_ok=True)
64
61
 
65
62
  if path.exists():
66
- if on_exist == "skip":
67
- logger.debug("[file] '%s' exists, skipping", path)
68
- return path
69
- if on_exist == "rename":
70
- path = _get_non_conflicting_path(path)
71
- logger.debug("[file] Renaming target to avoid conflict: %s", path)
72
- else:
73
- logger.debug("[file] '%s' exists, will overwrite", path)
74
-
75
- # Prepare content and write mode
76
- content_to_write: str | bytes
77
- if dump_json:
78
- # Serialize original object to JSON string
79
- json_str = json.dumps(content, ensure_ascii=False, indent=2)
80
- if len(json_str.encode(encoding)) > _JSON_INDENT_THRESHOLD:
81
- json_str = json.dumps(content, ensure_ascii=False, separators=(",", ":"))
82
- content_to_write = json_str
83
- write_mode = "w"
84
- else:
85
- if isinstance(content, (str | bytes)):
86
- content_to_write = content
87
- else:
88
- raise TypeError("Non-JSON content must be str or bytes.")
89
- write_mode = "wb" if isinstance(content, bytes) else "w"
90
-
91
- try:
92
- with tempfile.NamedTemporaryFile(
93
- mode=write_mode,
94
- encoding=None if "b" in write_mode else encoding,
95
- newline=None if "b" in write_mode else "\n",
96
- delete=False,
97
- dir=path.parent,
98
- ) as tmp:
99
- tmp.write(content_to_write)
100
- tmp_path = Path(tmp.name)
101
- tmp_path.replace(path)
102
- logger.debug("[file] '%s' written successfully", path)
103
- return path
104
- except Exception as exc:
105
- logger.warning("[file] Error writing %r: %s", path, exc)
106
- return None
63
+ match on_exist:
64
+ case "skip":
65
+ return path
66
+ case "rename":
67
+ path = _unique_path(path)
68
+
69
+ write_mode = "wb" if isinstance(content, bytes) else "w"
70
+
71
+ with tempfile.NamedTemporaryFile(
72
+ mode=write_mode,
73
+ encoding=None if "b" in write_mode else encoding,
74
+ newline=None if "b" in write_mode else "\n",
75
+ delete=False,
76
+ dir=path.parent,
77
+ ) as tmp:
78
+ tmp.write(content)
79
+ tmp_path = Path(tmp.name)
80
+ tmp_path.replace(path)
81
+ return path