novel-downloader 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +3 -3
- novel_downloader/cli/export.py +1 -1
- novel_downloader/cli/ui.py +7 -7
- novel_downloader/config/adapter.py +191 -154
- novel_downloader/core/__init__.py +5 -6
- novel_downloader/core/exporters/common/txt.py +9 -9
- novel_downloader/core/exporters/linovelib/txt.py +9 -9
- novel_downloader/core/fetchers/qidian.py +20 -35
- novel_downloader/core/interfaces/fetcher.py +2 -2
- novel_downloader/core/interfaces/parser.py +2 -2
- novel_downloader/core/parsers/base.py +1 -0
- novel_downloader/core/parsers/eightnovel.py +2 -2
- novel_downloader/core/parsers/esjzone.py +3 -3
- novel_downloader/core/parsers/qidian/main_parser.py +747 -12
- novel_downloader/core/parsers/qidian/utils/__init__.py +2 -21
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
- novel_downloader/core/parsers/xiguashuwu.py +6 -12
- novel_downloader/locales/en.json +3 -3
- novel_downloader/locales/zh.json +3 -3
- novel_downloader/utils/__init__.py +0 -2
- novel_downloader/utils/chapter_storage.py +2 -3
- novel_downloader/utils/constants.py +1 -3
- novel_downloader/utils/cookies.py +32 -17
- novel_downloader/utils/crypto_utils/__init__.py +0 -6
- novel_downloader/utils/crypto_utils/rc4.py +40 -50
- novel_downloader/utils/epub/__init__.py +2 -3
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +5 -5
- novel_downloader/utils/epub/documents.py +7 -7
- novel_downloader/utils/epub/models.py +8 -8
- novel_downloader/utils/epub/utils.py +10 -10
- novel_downloader/utils/file_utils/io.py +48 -73
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -11
- novel_downloader/utils/fontocr/__init__.py +13 -0
- novel_downloader/utils/{fontocr.py → fontocr/core.py} +70 -61
- novel_downloader/utils/fontocr/loader.py +50 -0
- novel_downloader/utils/logger.py +80 -56
- novel_downloader/utils/network.py +16 -40
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/sleep_utils.py +53 -43
- novel_downloader/web/main.py +1 -1
- novel_downloader/web/pages/search.py +3 -3
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/METADATA +2 -1
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/RECORD +51 -55
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -6,25 +6,6 @@ novel_downloader.core.parsers.qidian.utils
|
|
6
6
|
Utility functions and helpers for parsing and decrypting Qidian novel pages
|
7
7
|
"""
|
8
8
|
|
9
|
-
__all__ = [
|
10
|
-
"find_ssr_page_context",
|
11
|
-
"extract_chapter_info",
|
12
|
-
"is_restricted_page",
|
13
|
-
"vip_status",
|
14
|
-
"can_view_chapter",
|
15
|
-
"is_encrypted",
|
16
|
-
"is_duplicated",
|
17
|
-
"QidianNodeDecryptor",
|
18
|
-
"get_decryptor",
|
19
|
-
]
|
9
|
+
__all__ = ["get_decryptor"]
|
20
10
|
|
21
|
-
from .
|
22
|
-
can_view_chapter,
|
23
|
-
extract_chapter_info,
|
24
|
-
find_ssr_page_context,
|
25
|
-
is_duplicated,
|
26
|
-
is_encrypted,
|
27
|
-
is_restricted_page,
|
28
|
-
vip_status,
|
29
|
-
)
|
30
|
-
from .node_decryptor import QidianNodeDecryptor, get_decryptor
|
11
|
+
from .node_decryptor import get_decryptor
|
@@ -36,10 +36,10 @@ class QidianNodeDecryptor:
|
|
36
36
|
3. Download the remote Fock module JS if not already present.
|
37
37
|
|
38
38
|
Calling `decrypt()` will:
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
* Write a temp JSON input file with [ciphertext, chapter_id, fkp, fuid].
|
40
|
+
* Spawn `node qidian_decrypt_node.js <in> <out>`.
|
41
|
+
* Read and return the decrypted text.
|
42
|
+
* Clean up the temp files.
|
43
43
|
"""
|
44
44
|
|
45
45
|
QIDIAN_FOCK_JS_URL: str = (
|
@@ -29,6 +29,7 @@ from novel_downloader.utils.constants import (
|
|
29
29
|
XIGUASHUWU_FONT_MAP_PATH,
|
30
30
|
)
|
31
31
|
from novel_downloader.utils.crypto_utils.aes_util import aes_cbc_decrypt
|
32
|
+
from novel_downloader.utils.fontocr import get_font_ocr
|
32
33
|
|
33
34
|
logger = logging.getLogger(__name__)
|
34
35
|
|
@@ -292,26 +293,19 @@ class XiguashuwuParser(BaseParser):
|
|
292
293
|
:return: The recognized character (top-1) if OCR succeeds, otherwise None.
|
293
294
|
"""
|
294
295
|
try:
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
from PIL import Image
|
299
|
-
|
300
|
-
from novel_downloader.utils.fontocr import get_font_ocr
|
296
|
+
ocr = get_font_ocr()
|
297
|
+
if not ocr:
|
298
|
+
return None
|
301
299
|
|
302
300
|
resp = requests.get(url, headers=DEFAULT_USER_HEADERS, timeout=15)
|
303
301
|
resp.raise_for_status()
|
304
302
|
|
305
|
-
|
306
|
-
img_np = np.asarray(im)
|
303
|
+
img_np = ocr.load_image_array_from_bytes(resp.content)
|
307
304
|
|
308
|
-
|
309
|
-
char, score = ocr.predict([img_np], top_k=1)[0][0]
|
305
|
+
char, score = ocr.predict([img_np])[0]
|
310
306
|
|
311
307
|
return char if score >= cls._CONF_THRESHOLD else None
|
312
308
|
|
313
|
-
except ImportError:
|
314
|
-
logger.warning("[Parser] FontOCR not available, font decoding will skip")
|
315
309
|
except Exception as e:
|
316
310
|
logger.warning("[Parser] Failed to ocr glyph image %s: %s", url, e)
|
317
311
|
return None
|
novel_downloader/locales/en.json
CHANGED
@@ -39,9 +39,9 @@
|
|
39
39
|
"login_description": "Description",
|
40
40
|
"login_hint": "Hint",
|
41
41
|
"login_use_config": "Using value from config.",
|
42
|
-
"login_enter_password": "Enter password
|
43
|
-
"login_enter_cookie": "Enter cookies
|
44
|
-
"login_enter_value": "Enter value
|
42
|
+
"login_enter_password": "Enter password",
|
43
|
+
"login_enter_cookie": "Enter cookies",
|
44
|
+
"login_enter_value": "Enter value",
|
45
45
|
"login_required_field": "This field is required. Please enter a value.",
|
46
46
|
|
47
47
|
"clean_logs": "Clean log directory",
|
novel_downloader/locales/zh.json
CHANGED
@@ -39,9 +39,9 @@
|
|
39
39
|
"login_description": "说明",
|
40
40
|
"login_hint": "提示",
|
41
41
|
"login_use_config": "使用配置中的默认值",
|
42
|
-
"login_enter_password": "
|
43
|
-
"login_enter_cookie": "请输入 Cookie
|
44
|
-
"login_enter_value": "
|
42
|
+
"login_enter_password": "请输入密码",
|
43
|
+
"login_enter_cookie": "请输入 Cookie",
|
44
|
+
"login_enter_value": "请输入值",
|
45
45
|
"login_required_field": "该字段是必填项, 请重新输入",
|
46
46
|
|
47
47
|
"clean_failed": "删除失败: {path}",
|
@@ -11,7 +11,6 @@ __all__ = [
|
|
11
11
|
"TextCleaner",
|
12
12
|
"parse_cookies",
|
13
13
|
"get_cookie_value",
|
14
|
-
"rc4_crypt",
|
15
14
|
"sanitize_filename",
|
16
15
|
"write_file",
|
17
16
|
"download",
|
@@ -29,7 +28,6 @@ from .cookies import (
|
|
29
28
|
get_cookie_value,
|
30
29
|
parse_cookies,
|
31
30
|
)
|
32
|
-
from .crypto_utils import rc4_crypt
|
33
31
|
from .file_utils import (
|
34
32
|
sanitize_filename,
|
35
33
|
write_file,
|
@@ -13,7 +13,7 @@ import json
|
|
13
13
|
import sqlite3
|
14
14
|
import types
|
15
15
|
from pathlib import Path
|
16
|
-
from typing import Any, Self
|
16
|
+
from typing import Any, Self
|
17
17
|
|
18
18
|
from novel_downloader.models import ChapterDict
|
19
19
|
|
@@ -313,8 +313,7 @@ class ChapterStorage:
|
|
313
313
|
@staticmethod
|
314
314
|
def _load_dict(data: str) -> dict[str, Any]:
|
315
315
|
try:
|
316
|
-
|
317
|
-
return cast(dict[str, Any], parsed)
|
316
|
+
return json.loads(data) or {}
|
318
317
|
except Exception:
|
319
318
|
return {}
|
320
319
|
|
@@ -16,14 +16,12 @@ from platformdirs import user_config_path
|
|
16
16
|
# -----------------------------------------------------------------------------
|
17
17
|
PACKAGE_NAME = "novel_downloader" # Python package name
|
18
18
|
APP_NAME = "NovelDownloader" # Display name
|
19
|
-
APP_DIR_NAME = PACKAGE_NAME # Directory name for platformdirs
|
20
|
-
LOGGER_NAME = PACKAGE_NAME # Root logger name
|
21
19
|
|
22
20
|
# -----------------------------------------------------------------------------
|
23
21
|
# Base directories
|
24
22
|
# -----------------------------------------------------------------------------
|
25
23
|
# Base config directory (e.g. ~/AppData/Local/novel_downloader/)
|
26
|
-
BASE_CONFIG_DIR =
|
24
|
+
BASE_CONFIG_DIR = user_config_path(PACKAGE_NAME, appauthor=False)
|
27
25
|
WORK_DIR = Path.cwd()
|
28
26
|
PACKAGE_ROOT: Path = Path(__file__).parent.parent
|
29
27
|
LOCALES_DIR: Path = PACKAGE_ROOT / "locales"
|
@@ -8,10 +8,11 @@ Utility for normalizing cookie input from user configuration.
|
|
8
8
|
|
9
9
|
__all__ = ["parse_cookies", "get_cookie_value"]
|
10
10
|
|
11
|
+
import functools
|
11
12
|
import json
|
12
13
|
from collections.abc import Mapping
|
13
|
-
from http.cookies import SimpleCookie
|
14
14
|
from pathlib import Path
|
15
|
+
from typing import Any
|
15
16
|
|
16
17
|
|
17
18
|
def parse_cookies(cookies: str | Mapping[str, str]) -> dict[str, str]:
|
@@ -27,10 +28,16 @@ def parse_cookies(cookies: str | Mapping[str, str]) -> dict[str, str]:
|
|
27
28
|
:raises TypeError: If the input is neither string nor dict-like
|
28
29
|
"""
|
29
30
|
if isinstance(cookies, str):
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
31
|
+
result: dict[str, str] = {}
|
32
|
+
for part in cookies.split(";"):
|
33
|
+
if "=" not in part:
|
34
|
+
continue
|
35
|
+
key, value = part.split("=", 1)
|
36
|
+
key, value = key.strip(), value.strip()
|
37
|
+
if not key:
|
38
|
+
continue
|
39
|
+
result[key] = value
|
40
|
+
return result
|
34
41
|
elif isinstance(cookies, Mapping):
|
35
42
|
return {str(k).strip(): str(v).strip() for k, v in cookies.items()}
|
36
43
|
raise TypeError("Unsupported cookie format: must be str or dict-like")
|
@@ -38,17 +45,25 @@ def parse_cookies(cookies: str | Mapping[str, str]) -> dict[str, str]:
|
|
38
45
|
|
39
46
|
def get_cookie_value(state_files: list[Path], key: str) -> str:
|
40
47
|
for state_file in state_files:
|
41
|
-
|
42
|
-
|
43
|
-
data = json.load(f)
|
44
|
-
except Exception:
|
45
|
-
continue
|
46
|
-
|
48
|
+
mtime = state_file.stat().st_mtime
|
49
|
+
data = load_state_file(state_file, mtime)
|
47
50
|
cookies = data.get("cookies", [])
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
51
|
+
value = next(
|
52
|
+
(
|
53
|
+
c.get("value")
|
54
|
+
for c in cookies
|
55
|
+
if c.get("name") == key and isinstance(c.get("value"), str)
|
56
|
+
),
|
57
|
+
None,
|
58
|
+
)
|
59
|
+
if isinstance(value, str):
|
60
|
+
return value
|
54
61
|
return ""
|
62
|
+
|
63
|
+
|
64
|
+
@functools.cache
|
65
|
+
def load_state_file(state_file: Path, mtime: float = 0.0) -> dict[str, Any]:
|
66
|
+
try:
|
67
|
+
return json.loads(state_file.read_text(encoding="utf-8")) or {}
|
68
|
+
except (OSError, json.JSONDecodeError):
|
69
|
+
return {}
|
@@ -3,62 +3,52 @@
|
|
3
3
|
novel_downloader.utils.crypto_utils.rc4
|
4
4
|
---------------------------------------
|
5
5
|
|
6
|
-
RC4 stream cipher
|
6
|
+
Minimal RC4 stream cipher implementation.
|
7
7
|
"""
|
8
8
|
|
9
|
-
import base64
|
10
9
|
|
11
|
-
|
12
|
-
def rc4_crypt(
|
13
|
-
key: str,
|
14
|
-
data: str,
|
15
|
-
*,
|
16
|
-
mode: str = "encrypt",
|
17
|
-
encoding: str = "utf-8",
|
18
|
-
) -> str:
|
10
|
+
def rc4_init(key: bytes) -> list[int]:
|
19
11
|
"""
|
20
|
-
|
21
|
-
|
22
|
-
:param key: RC4 key (will be encoded using the specified encoding).
|
23
|
-
:param data: Plain-text (for 'encrypt') or Base64 cipher-text (for 'decrypt').
|
24
|
-
:param mode: Operation mode, either 'encrypt' or 'decrypt'. Defaults to 'encrypt'.
|
25
|
-
:param encoding: Character encoding for key and returned string. Defaults 'utf-8'.
|
26
|
-
|
27
|
-
:return: Base64 cipher-text (for encryption) or decoded plain-text (for decryption).
|
28
|
-
|
29
|
-
:raises ValueError: If mode is not 'encrypt' or 'decrypt'.
|
12
|
+
Key-Scheduling Algorithm (KSA)
|
30
13
|
"""
|
14
|
+
S = list(range(256))
|
15
|
+
j = 0
|
16
|
+
klen = len(key)
|
17
|
+
for i in range(256):
|
18
|
+
j = (j + S[i] + key[i % klen]) & 0xFF
|
19
|
+
S[i], S[j] = S[j], S[i]
|
20
|
+
return S
|
31
21
|
|
32
|
-
def _rc4(key_bytes: bytes, data_bytes: bytes) -> bytes:
|
33
|
-
# Key-Scheduling Algorithm (KSA)
|
34
|
-
S = list(range(256))
|
35
|
-
j = 0
|
36
|
-
for i in range(256):
|
37
|
-
j = (j + S[i] + key_bytes[i % len(key_bytes)]) % 256
|
38
|
-
S[i], S[j] = S[j], S[i]
|
39
|
-
|
40
|
-
# Pseudo-Random Generation Algorithm (PRGA)
|
41
|
-
i = j = 0
|
42
|
-
out: list[int] = []
|
43
|
-
for char in data_bytes:
|
44
|
-
i = (i + 1) % 256
|
45
|
-
j = (j + S[i]) % 256
|
46
|
-
S[i], S[j] = S[j], S[i]
|
47
|
-
K = S[(S[i] + S[j]) % 256]
|
48
|
-
out.append(char ^ K)
|
49
|
-
|
50
|
-
return bytes(out)
|
51
22
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
23
|
+
def rc4_stream(S_init: list[int], data: bytes) -> bytes:
|
24
|
+
"""
|
25
|
+
Pseudo-Random Generation Algorithm (PRGA)
|
26
|
+
"""
|
27
|
+
# make a copy of S since it mutates during PRGA
|
28
|
+
S = S_init.copy()
|
29
|
+
i = 0
|
30
|
+
j = 0
|
31
|
+
out = bytearray(len(data))
|
32
|
+
for idx, ch in enumerate(data):
|
33
|
+
i = (i + 1) & 0xFF
|
34
|
+
j = (j + S[i]) & 0xFF
|
35
|
+
S[i], S[j] = S[j], S[i]
|
36
|
+
K = S[(S[i] + S[j]) & 0xFF]
|
37
|
+
out[idx] = ch ^ K
|
38
|
+
|
39
|
+
return bytes(out)
|
40
|
+
|
41
|
+
|
42
|
+
def rc4_cipher(key: bytes, data: bytes) -> bytes:
|
43
|
+
"""
|
44
|
+
RC4 stream cipher.
|
58
45
|
|
59
|
-
|
60
|
-
|
61
|
-
plain_bytes = _rc4(key_bytes, cipher_bytes)
|
62
|
-
return plain_bytes.decode(encoding, errors="replace")
|
46
|
+
It performs the standard Key-Scheduling Algorithm (KSA) and
|
47
|
+
Pseudo-Random Generation Algorithm (PRGA) to produce the RC4 keystream.
|
63
48
|
|
64
|
-
|
49
|
+
:param key: RC4 key as bytes (must not be empty)
|
50
|
+
:param data: plaintext or ciphertext as bytes
|
51
|
+
:return: XORed bytes (encrypt/decrypt are identical)
|
52
|
+
"""
|
53
|
+
S = rc4_init(key)
|
54
|
+
return rc4_stream(S, data)
|
@@ -6,9 +6,8 @@ novel_downloader.utils.epub
|
|
6
6
|
Top-level package for EPUB export utilities.
|
7
7
|
|
8
8
|
Key components:
|
9
|
-
|
10
|
-
|
11
|
-
- Chapter, Volume : represent and render content sections and volume intros
|
9
|
+
* EpubBuilder : orchestrates metadata, manifest, spine, navigation, and resources
|
10
|
+
* Chapter, Volume : represent and render content sections and volume intros
|
12
11
|
|
13
12
|
Usage example:
|
14
13
|
|
@@ -4,14 +4,14 @@ novel_downloader.utils.epub.builder
|
|
4
4
|
-----------------------------------
|
5
5
|
|
6
6
|
Orchestrates the end-to-end EPUB build process by:
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
* Managing metadata (title, author, description, language, etc.)
|
8
|
+
* Collecting and deduplicating resources (chapters, images, stylesheets)
|
9
|
+
* Registering everything in the OPF manifest and spine
|
10
|
+
* Generating nav.xhtml, toc.ncx, content.opf, and the zipped .epub file
|
11
11
|
|
12
12
|
Provides:
|
13
|
-
|
14
|
-
|
13
|
+
* methods to add chapters, volumes, images, and styles
|
14
|
+
* a clean `export()` entry point that writes the final EPUB archive
|
15
15
|
"""
|
16
16
|
|
17
17
|
import zipfile
|
@@ -4,11 +4,11 @@ novel_downloader.utils.epub.constants
|
|
4
4
|
-------------------------------------
|
5
5
|
|
6
6
|
EPUB-specific constants used by the builder, including:
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
* Directory names for OEBPS structure
|
8
|
+
* XML namespace URIs
|
9
|
+
* Package attributes and document-type declarations
|
10
|
+
* Media type mappings for images
|
11
|
+
* Template strings for container.xml and cover image HTML
|
12
12
|
"""
|
13
13
|
|
14
14
|
ROOT_PATH = "OEBPS"
|
@@ -4,9 +4,9 @@ novel_downloader.utils.epub.documents
|
|
4
4
|
-------------------------------------
|
5
5
|
|
6
6
|
Defines the classes that render EPUB navigation and packaging documents:
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
* NavDocument: builds the XHTML nav.xhtml (EPUB 3)
|
8
|
+
* NCXDocument: builds the NCX XML navigation map (EPUB 2)
|
9
|
+
* OpfDocument: builds the content.opf package document
|
10
10
|
"""
|
11
11
|
|
12
12
|
from collections.abc import Sequence
|
@@ -234,10 +234,10 @@ class OpfDocument(EpubResource):
|
|
234
234
|
Generate the content.opf XML, which defines metadata, manifest, and spine.
|
235
235
|
|
236
236
|
This function outputs a complete OPF package document that includes:
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
237
|
+
* <metadata>: title, author, language, identifiers, etc.
|
238
|
+
* <manifest>: all resource entries
|
239
|
+
* <spine>: the reading order of the content
|
240
|
+
* <guide>: optional references like cover page
|
241
241
|
|
242
242
|
:return: A string containing the full OPF XML content.
|
243
243
|
"""
|
@@ -4,14 +4,14 @@ novel_downloader.utils.epub.models
|
|
4
4
|
----------------------------------
|
5
5
|
|
6
6
|
Defines the core EPUB data models and resource classes used by the builder:
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
7
|
+
* Typed entries for table of contents (ChapterEntry, VolumeEntry)
|
8
|
+
* Manifest and spine record types (ManifestEntry, SpineEntry)
|
9
|
+
* Hierarchical NavPoint for NCX navigation
|
10
|
+
* Base resource class (EpubResource) and specializations:
|
11
|
+
* StyleSheet
|
12
|
+
* ImageResource
|
13
|
+
* Chapter (with XHTML serialization)
|
14
|
+
* Volume container for grouping chapters with optional intro and cover
|
15
15
|
"""
|
16
16
|
|
17
17
|
from __future__ import annotations
|
@@ -4,9 +4,9 @@ novel_downloader.utils.epub.utils
|
|
4
4
|
---------------------------------
|
5
5
|
|
6
6
|
Pure utility functions for EPUB assembly, including:
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
* Computing file hashes
|
8
|
+
* Generating META-INF/container.xml
|
9
|
+
* Constructing HTML snippets for the book intro and volume intro
|
10
10
|
"""
|
11
11
|
|
12
12
|
import hashlib
|
@@ -59,9 +59,9 @@ def build_book_intro(
|
|
59
59
|
Build the HTML snippet for the overall book introduction.
|
60
60
|
|
61
61
|
This includes:
|
62
|
-
|
63
|
-
|
64
|
-
|
62
|
+
* A main heading ("Book Introduction")
|
63
|
+
* A list of metadata items (title, author, categories, word count, status)
|
64
|
+
* A "Summary" subheading and one or more paragraphs of summary text
|
65
65
|
|
66
66
|
:return: A HTML string for inclusion in `intro.xhtml`
|
67
67
|
"""
|
@@ -112,10 +112,10 @@ def build_volume_intro(
|
|
112
112
|
Build the HTML snippet for a single-volume introduction.
|
113
113
|
|
114
114
|
This includes:
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
115
|
+
* A decorative border image (top and bottom)
|
116
|
+
* A primary heading (volume main title)
|
117
|
+
* An optional secondary line (subtitle)
|
118
|
+
* One or more paragraphs of intro text
|
119
119
|
|
120
120
|
:param volume_title: e.g. "Volume 1 - The Beginning"
|
121
121
|
:param volume_intro_text: multiline intro text for this volume
|
@@ -8,99 +8,74 @@ File I/O utilities for reading and writing data.
|
|
8
8
|
|
9
9
|
__all__ = ["write_file"]
|
10
10
|
|
11
|
-
import json
|
12
|
-
import logging
|
13
11
|
import tempfile
|
14
12
|
from pathlib import Path
|
15
|
-
from typing import
|
13
|
+
from typing import Literal
|
16
14
|
|
17
15
|
from .sanitize import sanitize_filename
|
18
16
|
|
19
|
-
logger = logging.getLogger(__name__)
|
20
17
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
def _get_non_conflicting_path(path: Path) -> Path:
|
18
|
+
def _unique_path(path: Path, max_tries: int = 100) -> Path:
|
25
19
|
"""
|
26
|
-
|
20
|
+
Return a unique file path by appending _1, _2, ... if needed.
|
21
|
+
|
22
|
+
Falls back to a UUID suffix if all attempts fail.
|
27
23
|
"""
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
24
|
+
if not path.exists():
|
25
|
+
return path
|
26
|
+
|
27
|
+
stem = path.stem
|
28
|
+
suffix = path.suffix
|
29
|
+
|
30
|
+
for counter in range(1, max_tries + 1):
|
31
|
+
candidate = path.with_name(f"{stem}_{counter}{suffix}")
|
32
|
+
if not candidate.exists():
|
33
|
+
return candidate
|
34
|
+
|
35
|
+
# fallback: append a random/unique suffix
|
36
|
+
import uuid
|
37
|
+
|
38
|
+
return path.with_name(f"{stem}_{uuid.uuid4().hex}{suffix}")
|
36
39
|
|
37
40
|
|
38
41
|
def write_file(
|
39
|
-
content: str | bytes
|
42
|
+
content: str | bytes,
|
40
43
|
filepath: str | Path,
|
41
|
-
write_mode: str = "w",
|
42
44
|
*,
|
43
45
|
on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
|
44
|
-
dump_json: bool = False,
|
45
46
|
encoding: str = "utf-8",
|
46
|
-
) -> Path
|
47
|
+
) -> Path:
|
47
48
|
"""
|
48
|
-
Write content to a file safely with
|
49
|
-
|
50
|
-
|
51
|
-
:param
|
52
|
-
|
53
|
-
:param filepath: Destination path (str or Path).
|
54
|
-
:param mode: File mode ('w', 'wb'). Auto-determined if None.
|
55
|
-
:param on_exist: Behavior if file exists: 'overwrite', 'skip',
|
56
|
-
or 'rename'.
|
57
|
-
:param dump_json: If True, serialize content as JSON.
|
49
|
+
Write content to a file safely with atomic replacement.
|
50
|
+
|
51
|
+
:param content: The content to write; can be text or bytes.
|
52
|
+
:param filepath: Destination path.
|
53
|
+
:param on_exist: Behavior if file exists.
|
58
54
|
:param encoding: Text encoding for writing.
|
59
|
-
:return:
|
55
|
+
:return: The final path where the content was written.
|
56
|
+
:raise: Any I/O error such as PermissionError or OSError
|
60
57
|
"""
|
61
58
|
path = Path(filepath)
|
62
59
|
path = path.with_name(sanitize_filename(path.name))
|
63
60
|
path.parent.mkdir(parents=True, exist_ok=True)
|
64
61
|
|
65
62
|
if path.exists():
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
if isinstance(content, (str | bytes)):
|
86
|
-
content_to_write = content
|
87
|
-
else:
|
88
|
-
raise TypeError("Non-JSON content must be str or bytes.")
|
89
|
-
write_mode = "wb" if isinstance(content, bytes) else "w"
|
90
|
-
|
91
|
-
try:
|
92
|
-
with tempfile.NamedTemporaryFile(
|
93
|
-
mode=write_mode,
|
94
|
-
encoding=None if "b" in write_mode else encoding,
|
95
|
-
newline=None if "b" in write_mode else "\n",
|
96
|
-
delete=False,
|
97
|
-
dir=path.parent,
|
98
|
-
) as tmp:
|
99
|
-
tmp.write(content_to_write)
|
100
|
-
tmp_path = Path(tmp.name)
|
101
|
-
tmp_path.replace(path)
|
102
|
-
logger.debug("[file] '%s' written successfully", path)
|
103
|
-
return path
|
104
|
-
except Exception as exc:
|
105
|
-
logger.warning("[file] Error writing %r: %s", path, exc)
|
106
|
-
return None
|
63
|
+
match on_exist:
|
64
|
+
case "skip":
|
65
|
+
return path
|
66
|
+
case "rename":
|
67
|
+
path = _unique_path(path)
|
68
|
+
|
69
|
+
write_mode = "wb" if isinstance(content, bytes) else "w"
|
70
|
+
|
71
|
+
with tempfile.NamedTemporaryFile(
|
72
|
+
mode=write_mode,
|
73
|
+
encoding=None if "b" in write_mode else encoding,
|
74
|
+
newline=None if "b" in write_mode else "\n",
|
75
|
+
delete=False,
|
76
|
+
dir=path.parent,
|
77
|
+
) as tmp:
|
78
|
+
tmp.write(content)
|
79
|
+
tmp_path = Path(tmp.name)
|
80
|
+
tmp_path.replace(path)
|
81
|
+
return path
|