novel-downloader 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -2
- novel_downloader/cli/config.py +1 -83
- novel_downloader/cli/download.py +4 -5
- novel_downloader/cli/export.py +4 -1
- novel_downloader/cli/main.py +2 -0
- novel_downloader/cli/search.py +123 -0
- novel_downloader/config/__init__.py +3 -10
- novel_downloader/config/adapter.py +190 -54
- novel_downloader/config/loader.py +2 -3
- novel_downloader/core/__init__.py +13 -13
- novel_downloader/core/downloaders/__init__.py +10 -11
- novel_downloader/core/downloaders/base.py +152 -26
- novel_downloader/core/downloaders/biquge.py +5 -1
- novel_downloader/core/downloaders/common.py +157 -378
- novel_downloader/core/downloaders/esjzone.py +5 -1
- novel_downloader/core/downloaders/linovelib.py +5 -1
- novel_downloader/core/downloaders/qianbi.py +291 -4
- novel_downloader/core/downloaders/qidian.py +199 -285
- novel_downloader/core/downloaders/registry.py +67 -0
- novel_downloader/core/downloaders/sfacg.py +5 -1
- novel_downloader/core/downloaders/yamibo.py +5 -1
- novel_downloader/core/exporters/__init__.py +10 -11
- novel_downloader/core/exporters/base.py +87 -7
- novel_downloader/core/exporters/biquge.py +5 -8
- novel_downloader/core/exporters/common/__init__.py +2 -2
- novel_downloader/core/exporters/common/epub.py +82 -166
- novel_downloader/core/exporters/common/main_exporter.py +0 -60
- novel_downloader/core/exporters/common/txt.py +82 -83
- novel_downloader/core/exporters/epub_util.py +157 -1330
- novel_downloader/core/exporters/esjzone.py +5 -8
- novel_downloader/core/exporters/linovelib/__init__.py +2 -2
- novel_downloader/core/exporters/linovelib/epub.py +157 -212
- novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
- novel_downloader/core/exporters/linovelib/txt.py +67 -63
- novel_downloader/core/exporters/qianbi.py +5 -8
- novel_downloader/core/exporters/qidian.py +14 -4
- novel_downloader/core/exporters/registry.py +53 -0
- novel_downloader/core/exporters/sfacg.py +5 -8
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/exporters/yamibo.py +5 -8
- novel_downloader/core/fetchers/__init__.py +19 -24
- novel_downloader/core/fetchers/base/__init__.py +3 -3
- novel_downloader/core/fetchers/base/browser.py +23 -4
- novel_downloader/core/fetchers/base/session.py +30 -5
- novel_downloader/core/fetchers/biquge/__init__.py +3 -3
- novel_downloader/core/fetchers/biquge/browser.py +5 -0
- novel_downloader/core/fetchers/biquge/session.py +6 -1
- novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
- novel_downloader/core/fetchers/esjzone/browser.py +5 -0
- novel_downloader/core/fetchers/esjzone/session.py +6 -1
- novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
- novel_downloader/core/fetchers/linovelib/browser.py +6 -1
- novel_downloader/core/fetchers/linovelib/session.py +6 -1
- novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
- novel_downloader/core/fetchers/qianbi/browser.py +5 -0
- novel_downloader/core/fetchers/qianbi/session.py +5 -0
- novel_downloader/core/fetchers/qidian/__init__.py +3 -3
- novel_downloader/core/fetchers/qidian/browser.py +12 -4
- novel_downloader/core/fetchers/qidian/session.py +11 -3
- novel_downloader/core/fetchers/registry.py +71 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
- novel_downloader/core/fetchers/sfacg/browser.py +5 -0
- novel_downloader/core/fetchers/sfacg/session.py +5 -0
- novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
- novel_downloader/core/fetchers/yamibo/browser.py +5 -0
- novel_downloader/core/fetchers/yamibo/session.py +6 -1
- novel_downloader/core/interfaces/__init__.py +7 -5
- novel_downloader/core/interfaces/searcher.py +18 -0
- novel_downloader/core/parsers/__init__.py +10 -11
- novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
- novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
- novel_downloader/core/parsers/qidian/main_parser.py +10 -21
- novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/registry.py +68 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
- novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
- novel_downloader/core/searchers/__init__.py +20 -0
- novel_downloader/core/searchers/base.py +92 -0
- novel_downloader/core/searchers/biquge.py +83 -0
- novel_downloader/core/searchers/esjzone.py +84 -0
- novel_downloader/core/searchers/qianbi.py +131 -0
- novel_downloader/core/searchers/qidian.py +87 -0
- novel_downloader/core/searchers/registry.py +63 -0
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +12 -4
- novel_downloader/models/__init__.py +4 -30
- novel_downloader/models/config.py +12 -6
- novel_downloader/models/search.py +16 -0
- novel_downloader/models/types.py +0 -2
- novel_downloader/resources/config/settings.toml +31 -4
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/utils/__init__.py +52 -0
- novel_downloader/utils/chapter_storage.py +244 -224
- novel_downloader/utils/constants.py +1 -21
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +77 -0
- novel_downloader/utils/epub/documents.py +403 -0
- novel_downloader/utils/epub/models.py +134 -0
- novel_downloader/utils/epub/utils.py +212 -0
- novel_downloader/utils/file_utils/__init__.py +10 -14
- novel_downloader/utils/file_utils/io.py +20 -51
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -3
- novel_downloader/utils/fontocr/__init__.py +5 -5
- novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
- novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
- novel_downloader/utils/fontocr/ocr_v1.py +13 -1
- novel_downloader/utils/fontocr/ocr_v2.py +13 -1
- novel_downloader/utils/fontocr/ocr_v3.py +744 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +2 -0
- novel_downloader/utils/network.py +110 -251
- novel_downloader/utils/state.py +1 -0
- novel_downloader/utils/text_utils/__init__.py +18 -17
- novel_downloader/utils/text_utils/diff_display.py +4 -5
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +3 -3
- novel_downloader/utils/time_utils/datetime_utils.py +4 -5
- novel_downloader/utils/time_utils/sleep_utils.py +2 -3
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
- novel_downloader-1.5.0.dist-info/RECORD +164 -0
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/common/browser.py +0 -79
- novel_downloader/core/fetchers/common/session.py +0 -79
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.4.dist-info/RECORD +0 -165
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,212 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.utils.epub.utils
|
4
|
+
---------------------------------
|
5
|
+
|
6
|
+
Pure utility functions for EPUB assembly, including:
|
7
|
+
- Computing file hashes
|
8
|
+
- Generating META-INF/container.xml
|
9
|
+
- Constructing HTML snippets for the book intro and volume intro
|
10
|
+
"""
|
11
|
+
|
12
|
+
import hashlib
|
13
|
+
from pathlib import Path
|
14
|
+
|
15
|
+
from lxml import etree, html
|
16
|
+
|
17
|
+
from .constants import (
|
18
|
+
CONTAINER_TEMPLATE,
|
19
|
+
IMAGE_FOLDER,
|
20
|
+
PRETTY_PRINT_FLAG,
|
21
|
+
ROOT_PATH,
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
def hash_file(file_path: Path, chunk_size: int = 8192) -> str:
|
26
|
+
"""
|
27
|
+
Compute the SHA256 hash of a file.
|
28
|
+
|
29
|
+
:param file_path: The Path object of the file to hash.
|
30
|
+
:param chunk_size: The chunk size to read the file (default: 8192).
|
31
|
+
:return: The SHA256 hash string (lowercase hex) of the file content.
|
32
|
+
"""
|
33
|
+
h = hashlib.sha256()
|
34
|
+
with file_path.open("rb") as f:
|
35
|
+
while chunk := f.read(chunk_size):
|
36
|
+
h.update(chunk)
|
37
|
+
return h.hexdigest()
|
38
|
+
|
39
|
+
|
40
|
+
def build_container_xml(
|
41
|
+
root_path: str = ROOT_PATH,
|
42
|
+
) -> str:
|
43
|
+
"""
|
44
|
+
Generate the XML content for META-INF/container.xml in an EPUB archive.
|
45
|
+
|
46
|
+
:param root_path: The folder where the OPF file is stored.
|
47
|
+
:return: A string containing the full XML for container.xml.
|
48
|
+
"""
|
49
|
+
return CONTAINER_TEMPLATE.format(root_path=root_path)
|
50
|
+
|
51
|
+
|
52
|
+
def build_book_intro(
|
53
|
+
book_name: str,
|
54
|
+
author: str,
|
55
|
+
serial_status: str,
|
56
|
+
subject: list[str],
|
57
|
+
word_count: str,
|
58
|
+
summary: str,
|
59
|
+
) -> str:
|
60
|
+
"""
|
61
|
+
Build the HTML snippet for the overall book introduction.
|
62
|
+
|
63
|
+
This includes:
|
64
|
+
- A main heading ("Book Introduction")
|
65
|
+
- A list of metadata items (title, author, categories, word count, status)
|
66
|
+
- A "Summary" subheading and one or more paragraphs of summary text
|
67
|
+
|
68
|
+
:return: A HTML string for inclusion in `intro.xhtml`
|
69
|
+
"""
|
70
|
+
root = html.Element("div")
|
71
|
+
|
72
|
+
# Main heading
|
73
|
+
h1 = etree.SubElement(root, "h1")
|
74
|
+
h1.text = "书籍简介"
|
75
|
+
|
76
|
+
# Metadata list
|
77
|
+
info_div = etree.SubElement(root, "div", {"class": "intro-info"})
|
78
|
+
ul = etree.SubElement(info_div, "ul")
|
79
|
+
_add_li(ul, "书名", f"《{book_name}》" if book_name else "")
|
80
|
+
_add_li(ul, "作者", author)
|
81
|
+
_add_li(ul, "分类", ", ".join(subject) if subject else "")
|
82
|
+
_add_li(ul, "字数", word_count)
|
83
|
+
_add_li(ul, "状态", serial_status)
|
84
|
+
|
85
|
+
# Summary section
|
86
|
+
if summary:
|
87
|
+
# force page break before summary
|
88
|
+
etree.SubElement(root, "p", {"class": "new-page-after"})
|
89
|
+
h2 = etree.SubElement(root, "h2")
|
90
|
+
h2.text = "简介"
|
91
|
+
|
92
|
+
summary_div = etree.SubElement(root, "div", {"class": "intro-summary"})
|
93
|
+
for line in summary.splitlines():
|
94
|
+
line = line.strip()
|
95
|
+
if not line:
|
96
|
+
continue
|
97
|
+
p = etree.SubElement(summary_div, "p")
|
98
|
+
p.text = line
|
99
|
+
|
100
|
+
html_string: str = html.tostring(
|
101
|
+
root,
|
102
|
+
pretty_print=PRETTY_PRINT_FLAG,
|
103
|
+
encoding="unicode",
|
104
|
+
)
|
105
|
+
return html_string
|
106
|
+
|
107
|
+
|
108
|
+
def build_volume_intro(
|
109
|
+
volume_title: str,
|
110
|
+
volume_intro_text: str = "",
|
111
|
+
) -> str:
|
112
|
+
"""
|
113
|
+
Build the HTML snippet for a single-volume introduction.
|
114
|
+
|
115
|
+
This includes:
|
116
|
+
- A decorative border image (top and bottom)
|
117
|
+
- A primary heading (volume main title)
|
118
|
+
- An optional secondary line (subtitle)
|
119
|
+
- One or more paragraphs of intro text
|
120
|
+
|
121
|
+
:param volume_title: e.g. "Volume 1 - The Beginning"
|
122
|
+
:param volume_intro_text: multiline intro text for this volume
|
123
|
+
:return: A HTML string for inclusion in `vol_<n>.xhtml`
|
124
|
+
"""
|
125
|
+
root = html.Element("div")
|
126
|
+
|
127
|
+
# Break the title into two lines if possible
|
128
|
+
line1, line2 = _split_volume_title(volume_title)
|
129
|
+
|
130
|
+
header = etree.SubElement(root, "div", {"class": "vol-header"})
|
131
|
+
|
132
|
+
# Top decorative border
|
133
|
+
header.append(_make_vol_border_img(flip=False))
|
134
|
+
|
135
|
+
# Main title
|
136
|
+
h1 = etree.SubElement(header, "h1", {"class": "vol-title-main"})
|
137
|
+
h1.text = line1
|
138
|
+
|
139
|
+
# Bottom decorative border (flipped)
|
140
|
+
header.append(_make_vol_border_img(flip=True))
|
141
|
+
|
142
|
+
# Subtitle (if any)
|
143
|
+
if line2:
|
144
|
+
h2 = etree.SubElement(header, "h2", {"class": "vol-title-sub"})
|
145
|
+
h2.text = line2
|
146
|
+
|
147
|
+
# Intro text paragraphs
|
148
|
+
if volume_intro_text:
|
149
|
+
etree.SubElement(root, "p", {"class": "new-page-after"})
|
150
|
+
|
151
|
+
vol_div = etree.SubElement(root, "div", {"class": "vol-intro-text"})
|
152
|
+
for line in volume_intro_text.splitlines():
|
153
|
+
line = line.strip()
|
154
|
+
if not line:
|
155
|
+
continue
|
156
|
+
p = etree.SubElement(vol_div, "p")
|
157
|
+
p.text = line
|
158
|
+
|
159
|
+
html_string: str = html.tostring(
|
160
|
+
root,
|
161
|
+
pretty_print=PRETTY_PRINT_FLAG,
|
162
|
+
encoding="unicode",
|
163
|
+
)
|
164
|
+
return html_string
|
165
|
+
|
166
|
+
|
167
|
+
def _add_li(ul: etree._Element, label: str, value: str) -> None:
|
168
|
+
"""
|
169
|
+
Append a `<li>` with 'label: value' if value is nonempty.
|
170
|
+
"""
|
171
|
+
if value:
|
172
|
+
li = etree.SubElement(ul, "li")
|
173
|
+
li.text = f"{label}: {value}"
|
174
|
+
|
175
|
+
|
176
|
+
def _make_vol_border_img(flip: bool = False) -> html.HtmlElement:
|
177
|
+
"""
|
178
|
+
Return a `<div>` containing the `volume_border.png` image,
|
179
|
+
styled by the given class name.
|
180
|
+
"""
|
181
|
+
classes = ["vol-border"]
|
182
|
+
if flip:
|
183
|
+
classes.append("flip")
|
184
|
+
cls = " ".join(classes)
|
185
|
+
|
186
|
+
div = html.Element("div", {"class": cls})
|
187
|
+
etree.SubElement(
|
188
|
+
div,
|
189
|
+
"img",
|
190
|
+
{
|
191
|
+
"src": f"../{IMAGE_FOLDER}/volume_border.png",
|
192
|
+
"alt": "",
|
193
|
+
},
|
194
|
+
)
|
195
|
+
return div
|
196
|
+
|
197
|
+
|
198
|
+
def _split_volume_title(volume_title: str) -> tuple[str, str]:
|
199
|
+
"""
|
200
|
+
Split volume title into two parts for better display.
|
201
|
+
|
202
|
+
:param volume_title: Original volume title string.
|
203
|
+
:return: Tuple of (line1, line2)
|
204
|
+
"""
|
205
|
+
if " " in volume_title:
|
206
|
+
parts = volume_title.split(" ", 1)
|
207
|
+
elif "-" in volume_title:
|
208
|
+
parts = volume_title.split("-", 1)
|
209
|
+
else:
|
210
|
+
return volume_title, ""
|
211
|
+
|
212
|
+
return parts[0], parts[1]
|
@@ -17,18 +17,6 @@ Included utilities:
|
|
17
17
|
- read_text_file / read_json_file / read_binary_file: load content from file
|
18
18
|
"""
|
19
19
|
|
20
|
-
from .io import (
|
21
|
-
load_blacklisted_words,
|
22
|
-
load_text_resource,
|
23
|
-
read_binary_file,
|
24
|
-
read_json_file,
|
25
|
-
read_text_file,
|
26
|
-
save_as_json,
|
27
|
-
save_as_txt,
|
28
|
-
)
|
29
|
-
from .normalize import normalize_txt_line_endings
|
30
|
-
from .sanitize import sanitize_filename
|
31
|
-
|
32
20
|
__all__ = [
|
33
21
|
"sanitize_filename",
|
34
22
|
"save_as_json",
|
@@ -36,7 +24,15 @@ __all__ = [
|
|
36
24
|
"read_text_file",
|
37
25
|
"read_json_file",
|
38
26
|
"read_binary_file",
|
39
|
-
"load_text_resource",
|
40
|
-
"load_blacklisted_words",
|
41
27
|
"normalize_txt_line_endings",
|
42
28
|
]
|
29
|
+
|
30
|
+
from .io import (
|
31
|
+
read_binary_file,
|
32
|
+
read_json_file,
|
33
|
+
read_text_file,
|
34
|
+
save_as_json,
|
35
|
+
save_as_txt,
|
36
|
+
)
|
37
|
+
from .normalize import normalize_txt_line_endings
|
38
|
+
from .sanitize import sanitize_filename
|
@@ -11,10 +11,17 @@ Includes:
|
|
11
11
|
- Simple helpers for reading files with fallback and logging
|
12
12
|
"""
|
13
13
|
|
14
|
+
__all__ = [
|
15
|
+
"save_as_txt",
|
16
|
+
"save_as_json",
|
17
|
+
"read_text_file",
|
18
|
+
"read_json_file",
|
19
|
+
"read_binary_file",
|
20
|
+
]
|
21
|
+
|
14
22
|
import json
|
15
23
|
import logging
|
16
24
|
import tempfile
|
17
|
-
from importlib.resources import files
|
18
25
|
from pathlib import Path
|
19
26
|
from typing import Any, Literal
|
20
27
|
|
@@ -42,12 +49,12 @@ def _get_non_conflicting_path(path: Path) -> Path:
|
|
42
49
|
def _write_file(
|
43
50
|
content: str | bytes | dict[Any, Any] | list[Any] | Any,
|
44
51
|
filepath: str | Path,
|
45
|
-
|
52
|
+
write_mode: str = "w",
|
46
53
|
*,
|
47
54
|
on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
|
48
55
|
dump_json: bool = False,
|
49
56
|
encoding: str = "utf-8",
|
50
|
-
) ->
|
57
|
+
) -> Path | None:
|
51
58
|
"""
|
52
59
|
Write content to a file safely with optional atomic behavior
|
53
60
|
and JSON serialization.
|
@@ -60,7 +67,7 @@ def _write_file(
|
|
60
67
|
or 'rename'.
|
61
68
|
:param dump_json: If True, serialize content as JSON.
|
62
69
|
:param encoding: Text encoding for writing.
|
63
|
-
:return:
|
70
|
+
:return: Path if writing succeeds, None otherwise.
|
64
71
|
"""
|
65
72
|
path = Path(filepath)
|
66
73
|
path = path.with_name(sanitize_filename(path.name))
|
@@ -69,7 +76,7 @@ def _write_file(
|
|
69
76
|
if path.exists():
|
70
77
|
if on_exist == "skip":
|
71
78
|
logger.debug("[file] '%s' exists, skipping", path)
|
72
|
-
return
|
79
|
+
return path
|
73
80
|
if on_exist == "rename":
|
74
81
|
path = _get_non_conflicting_path(path)
|
75
82
|
logger.debug("[file] Renaming target to avoid conflict: %s", path)
|
@@ -104,10 +111,10 @@ def _write_file(
|
|
104
111
|
tmp_path = Path(tmp.name)
|
105
112
|
tmp_path.replace(path)
|
106
113
|
logger.debug("[file] '%s' written successfully", path)
|
107
|
-
return
|
114
|
+
return path
|
108
115
|
except Exception as exc:
|
109
116
|
logger.warning("[file] Error writing %r: %s", path, exc)
|
110
|
-
return
|
117
|
+
return None
|
111
118
|
|
112
119
|
|
113
120
|
def save_as_txt(
|
@@ -116,7 +123,7 @@ def save_as_txt(
|
|
116
123
|
*,
|
117
124
|
encoding: str = "utf-8",
|
118
125
|
on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
|
119
|
-
) ->
|
126
|
+
) -> Path | None:
|
120
127
|
"""
|
121
128
|
Save plain text content to the given file path.
|
122
129
|
|
@@ -124,12 +131,12 @@ def save_as_txt(
|
|
124
131
|
:param filepath: Destination file path.
|
125
132
|
:param encoding: Text encoding to use (default: 'utf-8').
|
126
133
|
:param on_exist: How to handle existing files: 'overwrite', 'skip', or 'rename'.
|
127
|
-
:return:
|
134
|
+
:return: Path if writing succeeds, None otherwise.
|
128
135
|
"""
|
129
136
|
return _write_file(
|
130
137
|
content=content,
|
131
138
|
filepath=filepath,
|
132
|
-
|
139
|
+
write_mode="w",
|
133
140
|
on_exist=on_exist,
|
134
141
|
dump_json=False,
|
135
142
|
encoding=encoding,
|
@@ -142,7 +149,7 @@ def save_as_json(
|
|
142
149
|
*,
|
143
150
|
encoding: str = "utf-8",
|
144
151
|
on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
|
145
|
-
) ->
|
152
|
+
) -> Path | None:
|
146
153
|
"""
|
147
154
|
Save JSON-serializable content to the given file path.
|
148
155
|
|
@@ -150,12 +157,12 @@ def save_as_json(
|
|
150
157
|
:param filepath: Destination file path.
|
151
158
|
:param encoding: Text encoding to use (default: 'utf-8').
|
152
159
|
:param on_exist: How to handle existing files: 'overwrite', 'skip', or 'rename'.
|
153
|
-
:return:
|
160
|
+
:return: Path if writing succeeds, None otherwise.
|
154
161
|
"""
|
155
162
|
return _write_file(
|
156
163
|
content=content,
|
157
164
|
filepath=filepath,
|
158
|
-
|
165
|
+
write_mode="w",
|
159
166
|
on_exist=on_exist,
|
160
167
|
dump_json=True,
|
161
168
|
encoding=encoding,
|
@@ -207,41 +214,3 @@ def read_binary_file(filepath: str | Path) -> bytes | None:
|
|
207
214
|
except Exception as e:
|
208
215
|
logger.warning("[file] Failed to read %r: %s", path, e)
|
209
216
|
return None
|
210
|
-
|
211
|
-
|
212
|
-
def load_text_resource(
|
213
|
-
filename: str,
|
214
|
-
package: str = "novel_downloader.resources.text",
|
215
|
-
) -> str:
|
216
|
-
"""
|
217
|
-
Load and return the contents of a text resource.
|
218
|
-
|
219
|
-
:param filename: Name of the text file (e.g. "blacklist.txt").
|
220
|
-
:param package: Package path where resources live (default: text resources).
|
221
|
-
For other resource types, point to the appropriate subpackage
|
222
|
-
(e.g. "novel_downloader.resources.css").
|
223
|
-
:return: File contents as a string.
|
224
|
-
"""
|
225
|
-
resource_path = files(package).joinpath(filename)
|
226
|
-
return resource_path.read_text(encoding="utf-8")
|
227
|
-
|
228
|
-
|
229
|
-
def load_blacklisted_words() -> set[str]:
|
230
|
-
"""
|
231
|
-
Convenience loader for the blacklist.txt in the text resources.
|
232
|
-
|
233
|
-
:return: A set of non-empty, stripped lines from blacklist.txt.
|
234
|
-
"""
|
235
|
-
text = load_text_resource("blacklist.txt")
|
236
|
-
return {line.strip() for line in text.splitlines() if line.strip()}
|
237
|
-
|
238
|
-
|
239
|
-
__all__ = [
|
240
|
-
"save_as_txt",
|
241
|
-
"save_as_json",
|
242
|
-
"read_text_file",
|
243
|
-
"read_json_file",
|
244
|
-
"read_binary_file",
|
245
|
-
"load_text_resource",
|
246
|
-
"load_blacklisted_words",
|
247
|
-
]
|
@@ -9,6 +9,8 @@ across platforms or output formats.
|
|
9
9
|
Currently includes line-ending normalization for .txt files.
|
10
10
|
"""
|
11
11
|
|
12
|
+
__all__ = ["normalize_txt_line_endings"]
|
13
|
+
|
12
14
|
import logging
|
13
15
|
from pathlib import Path
|
14
16
|
|
@@ -46,8 +48,6 @@ def normalize_txt_line_endings(folder_path: str | Path) -> None:
|
|
46
48
|
return
|
47
49
|
|
48
50
|
|
49
|
-
__all__ = ["normalize_txt_line_endings"]
|
50
|
-
|
51
51
|
if __name__ == "__main__": # pragma: no cover
|
52
52
|
import argparse
|
53
53
|
|
@@ -11,6 +11,8 @@ that replaces or removes illegal characters from filenames, trims
|
|
11
11
|
lengths, and avoids reserved names on Windows systems.
|
12
12
|
"""
|
13
13
|
|
14
|
+
__all__ = ["sanitize_filename"]
|
15
|
+
|
14
16
|
import logging
|
15
17
|
import os
|
16
18
|
import re
|
@@ -65,6 +67,3 @@ def sanitize_filename(filename: str, max_length: int | None = 255) -> str:
|
|
65
67
|
cleaned = "_untitled"
|
66
68
|
logger.debug("[file] Sanitized filename: %r -> %r", filename, cleaned)
|
67
69
|
return cleaned
|
68
|
-
|
69
|
-
|
70
|
-
__all__ = ["sanitize_filename"]
|
@@ -14,9 +14,9 @@ Supports:
|
|
14
14
|
Exposes the selected OCR engine version via `FontOCR`.
|
15
15
|
"""
|
16
16
|
|
17
|
-
# from .ocr_v1 import FontOCRV1 as FontOCR
|
18
|
-
from .ocr_v2 import FontOCRV2 as FontOCR
|
19
|
-
|
20
|
-
__version__ = "v2"
|
21
|
-
|
22
17
|
__all__ = ["FontOCR"]
|
18
|
+
__version__ = "3.0"
|
19
|
+
|
20
|
+
# from .ocr_v1 import FontOCRV1 as FontOCR
|
21
|
+
# from .ocr_v2 import FontOCRV2 as FontOCR
|
22
|
+
from .ocr_v3 import FontOCRV3 as FontOCR
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.utils.hash_store
|
4
|
-
|
3
|
+
novel_downloader.utils.fontocr.hash_store
|
4
|
+
-----------------------------------------
|
5
5
|
|
6
6
|
Manage a small collection of image perceptual hashes and their labels.
|
7
7
|
Supports loading/saving to .json or .npy, and basic CRUD + search.
|
@@ -15,10 +15,11 @@ from pathlib import Path
|
|
15
15
|
|
16
16
|
from PIL import Image
|
17
17
|
|
18
|
-
from
|
18
|
+
from ..constants import DATA_DIR
|
19
19
|
from .hash_utils import HASH_DISTANCE_THRESHOLD, fast_hamming_distance, phash
|
20
20
|
|
21
21
|
logger = logging.getLogger(__name__)
|
22
|
+
HASH_STORE_FILE = DATA_DIR / "image_hashes.json"
|
22
23
|
|
23
24
|
|
24
25
|
class _BKNode:
|
@@ -24,8 +24,8 @@ from novel_downloader.utils.constants import (
|
|
24
24
|
REC_CHAR_MODEL_FILES,
|
25
25
|
REC_IMAGE_SHAPE_MAP,
|
26
26
|
)
|
27
|
-
from novel_downloader.utils.hash_store import img_hash_store
|
28
27
|
|
28
|
+
from .hash_store import img_hash_store
|
29
29
|
from .model_loader import get_rec_chinese_char_model_dir
|
30
30
|
|
31
31
|
logger = logging.getLogger(__name__)
|
@@ -301,3 +301,15 @@ class FontOCRV1:
|
|
301
301
|
logger.error("[FontOCR] Failed to save fixed map: %s", e)
|
302
302
|
|
303
303
|
return mapping_result
|
304
|
+
|
305
|
+
@staticmethod
|
306
|
+
def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
|
307
|
+
"""
|
308
|
+
Replace each character in `text` using `font_map`,
|
309
|
+
leaving unmapped characters unchanged.
|
310
|
+
|
311
|
+
:param text: The input string, possibly containing obfuscated font chars.
|
312
|
+
:param font_map: A dict mapping obfuscated chars to real chars.
|
313
|
+
:return: The de-obfuscated text.
|
314
|
+
"""
|
315
|
+
return "".join(font_map.get(ch, ch) for ch in text)
|
@@ -35,8 +35,8 @@ from novel_downloader.utils.constants import (
|
|
35
35
|
REC_CHAR_MODEL_FILES,
|
36
36
|
REC_IMAGE_SHAPE_MAP,
|
37
37
|
)
|
38
|
-
from novel_downloader.utils.hash_store import img_hash_store
|
39
38
|
|
39
|
+
from .hash_store import img_hash_store
|
40
40
|
from .model_loader import (
|
41
41
|
get_rec_char_vector_dir,
|
42
42
|
get_rec_chinese_char_model_dir,
|
@@ -750,3 +750,15 @@ class FontOCRV2:
|
|
750
750
|
logger.error("[FontOCR] Failed to save fixed map: %s", e)
|
751
751
|
|
752
752
|
return mapping_result
|
753
|
+
|
754
|
+
@staticmethod
|
755
|
+
def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
|
756
|
+
"""
|
757
|
+
Replace each character in `text` using `font_map`,
|
758
|
+
leaving unmapped characters unchanged.
|
759
|
+
|
760
|
+
:param text: The input string, possibly containing obfuscated font chars.
|
761
|
+
:param font_map: A dict mapping obfuscated chars to real chars.
|
762
|
+
:return: The de-obfuscated text.
|
763
|
+
"""
|
764
|
+
return "".join(font_map.get(ch, ch) for ch in text)
|