novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -2
  3. novel_downloader/cli/config.py +1 -83
  4. novel_downloader/cli/download.py +4 -5
  5. novel_downloader/cli/export.py +4 -1
  6. novel_downloader/cli/main.py +2 -0
  7. novel_downloader/cli/search.py +123 -0
  8. novel_downloader/config/__init__.py +3 -10
  9. novel_downloader/config/adapter.py +190 -54
  10. novel_downloader/config/loader.py +2 -3
  11. novel_downloader/core/__init__.py +13 -13
  12. novel_downloader/core/downloaders/__init__.py +10 -11
  13. novel_downloader/core/downloaders/base.py +152 -26
  14. novel_downloader/core/downloaders/biquge.py +5 -1
  15. novel_downloader/core/downloaders/common.py +157 -378
  16. novel_downloader/core/downloaders/esjzone.py +5 -1
  17. novel_downloader/core/downloaders/linovelib.py +5 -1
  18. novel_downloader/core/downloaders/qianbi.py +291 -4
  19. novel_downloader/core/downloaders/qidian.py +199 -285
  20. novel_downloader/core/downloaders/registry.py +67 -0
  21. novel_downloader/core/downloaders/sfacg.py +5 -1
  22. novel_downloader/core/downloaders/yamibo.py +5 -1
  23. novel_downloader/core/exporters/__init__.py +10 -11
  24. novel_downloader/core/exporters/base.py +87 -7
  25. novel_downloader/core/exporters/biquge.py +5 -8
  26. novel_downloader/core/exporters/common/__init__.py +2 -2
  27. novel_downloader/core/exporters/common/epub.py +82 -166
  28. novel_downloader/core/exporters/common/main_exporter.py +0 -60
  29. novel_downloader/core/exporters/common/txt.py +82 -83
  30. novel_downloader/core/exporters/epub_util.py +157 -1330
  31. novel_downloader/core/exporters/esjzone.py +5 -8
  32. novel_downloader/core/exporters/linovelib/__init__.py +2 -2
  33. novel_downloader/core/exporters/linovelib/epub.py +157 -212
  34. novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
  35. novel_downloader/core/exporters/linovelib/txt.py +67 -63
  36. novel_downloader/core/exporters/qianbi.py +5 -8
  37. novel_downloader/core/exporters/qidian.py +14 -4
  38. novel_downloader/core/exporters/registry.py +53 -0
  39. novel_downloader/core/exporters/sfacg.py +5 -8
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/exporters/yamibo.py +5 -8
  42. novel_downloader/core/fetchers/__init__.py +19 -24
  43. novel_downloader/core/fetchers/base/__init__.py +3 -3
  44. novel_downloader/core/fetchers/base/browser.py +23 -4
  45. novel_downloader/core/fetchers/base/session.py +30 -5
  46. novel_downloader/core/fetchers/biquge/__init__.py +3 -3
  47. novel_downloader/core/fetchers/biquge/browser.py +5 -0
  48. novel_downloader/core/fetchers/biquge/session.py +6 -1
  49. novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
  50. novel_downloader/core/fetchers/esjzone/browser.py +5 -0
  51. novel_downloader/core/fetchers/esjzone/session.py +6 -1
  52. novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
  53. novel_downloader/core/fetchers/linovelib/browser.py +6 -1
  54. novel_downloader/core/fetchers/linovelib/session.py +6 -1
  55. novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
  56. novel_downloader/core/fetchers/qianbi/browser.py +5 -0
  57. novel_downloader/core/fetchers/qianbi/session.py +5 -0
  58. novel_downloader/core/fetchers/qidian/__init__.py +3 -3
  59. novel_downloader/core/fetchers/qidian/browser.py +12 -4
  60. novel_downloader/core/fetchers/qidian/session.py +11 -3
  61. novel_downloader/core/fetchers/registry.py +71 -0
  62. novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
  63. novel_downloader/core/fetchers/sfacg/browser.py +5 -0
  64. novel_downloader/core/fetchers/sfacg/session.py +5 -0
  65. novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
  66. novel_downloader/core/fetchers/yamibo/browser.py +5 -0
  67. novel_downloader/core/fetchers/yamibo/session.py +6 -1
  68. novel_downloader/core/interfaces/__init__.py +7 -5
  69. novel_downloader/core/interfaces/searcher.py +18 -0
  70. novel_downloader/core/parsers/__init__.py +10 -11
  71. novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
  72. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
  73. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
  74. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
  75. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  76. novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
  77. novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
  78. novel_downloader/core/parsers/qidian/main_parser.py +10 -21
  79. novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
  80. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
  81. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  82. novel_downloader/core/parsers/registry.py +68 -0
  83. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
  84. novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
  85. novel_downloader/core/searchers/__init__.py +20 -0
  86. novel_downloader/core/searchers/base.py +92 -0
  87. novel_downloader/core/searchers/biquge.py +83 -0
  88. novel_downloader/core/searchers/esjzone.py +84 -0
  89. novel_downloader/core/searchers/qianbi.py +131 -0
  90. novel_downloader/core/searchers/qidian.py +87 -0
  91. novel_downloader/core/searchers/registry.py +63 -0
  92. novel_downloader/locales/en.json +12 -4
  93. novel_downloader/locales/zh.json +12 -4
  94. novel_downloader/models/__init__.py +4 -30
  95. novel_downloader/models/config.py +12 -6
  96. novel_downloader/models/search.py +16 -0
  97. novel_downloader/models/types.py +0 -2
  98. novel_downloader/resources/config/settings.toml +31 -4
  99. novel_downloader/resources/css_styles/intro.css +83 -0
  100. novel_downloader/resources/css_styles/main.css +30 -89
  101. novel_downloader/utils/__init__.py +52 -0
  102. novel_downloader/utils/chapter_storage.py +244 -224
  103. novel_downloader/utils/constants.py +1 -21
  104. novel_downloader/utils/epub/__init__.py +34 -0
  105. novel_downloader/utils/epub/builder.py +377 -0
  106. novel_downloader/utils/epub/constants.py +77 -0
  107. novel_downloader/utils/epub/documents.py +403 -0
  108. novel_downloader/utils/epub/models.py +134 -0
  109. novel_downloader/utils/epub/utils.py +212 -0
  110. novel_downloader/utils/file_utils/__init__.py +10 -14
  111. novel_downloader/utils/file_utils/io.py +20 -51
  112. novel_downloader/utils/file_utils/normalize.py +2 -2
  113. novel_downloader/utils/file_utils/sanitize.py +2 -3
  114. novel_downloader/utils/fontocr/__init__.py +5 -5
  115. novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
  116. novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
  117. novel_downloader/utils/fontocr/ocr_v1.py +13 -1
  118. novel_downloader/utils/fontocr/ocr_v2.py +13 -1
  119. novel_downloader/utils/fontocr/ocr_v3.py +744 -0
  120. novel_downloader/utils/i18n.py +2 -0
  121. novel_downloader/utils/logger.py +2 -0
  122. novel_downloader/utils/network.py +110 -251
  123. novel_downloader/utils/state.py +1 -0
  124. novel_downloader/utils/text_utils/__init__.py +18 -17
  125. novel_downloader/utils/text_utils/diff_display.py +4 -5
  126. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  127. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  128. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  129. novel_downloader/utils/time_utils/__init__.py +3 -3
  130. novel_downloader/utils/time_utils/datetime_utils.py +4 -5
  131. novel_downloader/utils/time_utils/sleep_utils.py +2 -3
  132. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
  133. novel_downloader-1.5.0.dist-info/RECORD +164 -0
  134. novel_downloader/config/site_rules.py +0 -94
  135. novel_downloader/core/factory/__init__.py +0 -20
  136. novel_downloader/core/factory/downloader.py +0 -73
  137. novel_downloader/core/factory/exporter.py +0 -58
  138. novel_downloader/core/factory/fetcher.py +0 -96
  139. novel_downloader/core/factory/parser.py +0 -86
  140. novel_downloader/core/fetchers/common/__init__.py +0 -14
  141. novel_downloader/core/fetchers/common/browser.py +0 -79
  142. novel_downloader/core/fetchers/common/session.py +0 -79
  143. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  144. novel_downloader/core/parsers/common/__init__.py +0 -13
  145. novel_downloader/core/parsers/common/helper.py +0 -323
  146. novel_downloader/core/parsers/common/main_parser.py +0 -106
  147. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  148. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  149. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  150. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  151. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  152. novel_downloader/models/browser.py +0 -21
  153. novel_downloader/models/site_rules.py +0 -99
  154. novel_downloader/models/tasks.py +0 -33
  155. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  156. novel_downloader/resources/json/replace_word_map.json +0 -4
  157. novel_downloader/resources/text/blacklist.txt +0 -22
  158. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  159. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  160. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  161. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  162. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
  163. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
  164. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
  165. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,212 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.utils.epub.utils
4
+ ---------------------------------
5
+
6
+ Pure utility functions for EPUB assembly, including:
7
+ - Computing file hashes
8
+ - Generating META-INF/container.xml
9
+ - Constructing HTML snippets for the book intro and volume intro
10
+ """
11
+
12
+ import hashlib
13
+ from pathlib import Path
14
+
15
+ from lxml import etree, html
16
+
17
+ from .constants import (
18
+ CONTAINER_TEMPLATE,
19
+ IMAGE_FOLDER,
20
+ PRETTY_PRINT_FLAG,
21
+ ROOT_PATH,
22
+ )
23
+
24
+
25
+ def hash_file(file_path: Path, chunk_size: int = 8192) -> str:
26
+ """
27
+ Compute the SHA256 hash of a file.
28
+
29
+ :param file_path: The Path object of the file to hash.
30
+ :param chunk_size: The chunk size to read the file (default: 8192).
31
+ :return: The SHA256 hash string (lowercase hex) of the file content.
32
+ """
33
+ h = hashlib.sha256()
34
+ with file_path.open("rb") as f:
35
+ while chunk := f.read(chunk_size):
36
+ h.update(chunk)
37
+ return h.hexdigest()
38
+
39
+
40
+ def build_container_xml(
41
+ root_path: str = ROOT_PATH,
42
+ ) -> str:
43
+ """
44
+ Generate the XML content for META-INF/container.xml in an EPUB archive.
45
+
46
+ :param root_path: The folder where the OPF file is stored.
47
+ :return: A string containing the full XML for container.xml.
48
+ """
49
+ return CONTAINER_TEMPLATE.format(root_path=root_path)
50
+
51
+
52
+ def build_book_intro(
53
+ book_name: str,
54
+ author: str,
55
+ serial_status: str,
56
+ subject: list[str],
57
+ word_count: str,
58
+ summary: str,
59
+ ) -> str:
60
+ """
61
+ Build the HTML snippet for the overall book introduction.
62
+
63
+ This includes:
64
+ - A main heading ("Book Introduction")
65
+ - A list of metadata items (title, author, categories, word count, status)
66
+ - A "Summary" subheading and one or more paragraphs of summary text
67
+
68
+ :return: A HTML string for inclusion in `intro.xhtml`
69
+ """
70
+ root = html.Element("div")
71
+
72
+ # Main heading
73
+ h1 = etree.SubElement(root, "h1")
74
+ h1.text = "书籍简介"
75
+
76
+ # Metadata list
77
+ info_div = etree.SubElement(root, "div", {"class": "intro-info"})
78
+ ul = etree.SubElement(info_div, "ul")
79
+ _add_li(ul, "书名", f"《{book_name}》" if book_name else "")
80
+ _add_li(ul, "作者", author)
81
+ _add_li(ul, "分类", ", ".join(subject) if subject else "")
82
+ _add_li(ul, "字数", word_count)
83
+ _add_li(ul, "状态", serial_status)
84
+
85
+ # Summary section
86
+ if summary:
87
+ # force page break before summary
88
+ etree.SubElement(root, "p", {"class": "new-page-after"})
89
+ h2 = etree.SubElement(root, "h2")
90
+ h2.text = "简介"
91
+
92
+ summary_div = etree.SubElement(root, "div", {"class": "intro-summary"})
93
+ for line in summary.splitlines():
94
+ line = line.strip()
95
+ if not line:
96
+ continue
97
+ p = etree.SubElement(summary_div, "p")
98
+ p.text = line
99
+
100
+ html_string: str = html.tostring(
101
+ root,
102
+ pretty_print=PRETTY_PRINT_FLAG,
103
+ encoding="unicode",
104
+ )
105
+ return html_string
106
+
107
+
108
+ def build_volume_intro(
109
+ volume_title: str,
110
+ volume_intro_text: str = "",
111
+ ) -> str:
112
+ """
113
+ Build the HTML snippet for a single-volume introduction.
114
+
115
+ This includes:
116
+ - A decorative border image (top and bottom)
117
+ - A primary heading (volume main title)
118
+ - An optional secondary line (subtitle)
119
+ - One or more paragraphs of intro text
120
+
121
+ :param volume_title: e.g. "Volume 1 - The Beginning"
122
+ :param volume_intro_text: multiline intro text for this volume
123
+ :return: A HTML string for inclusion in `vol_<n>.xhtml`
124
+ """
125
+ root = html.Element("div")
126
+
127
+ # Break the title into two lines if possible
128
+ line1, line2 = _split_volume_title(volume_title)
129
+
130
+ header = etree.SubElement(root, "div", {"class": "vol-header"})
131
+
132
+ # Top decorative border
133
+ header.append(_make_vol_border_img(flip=False))
134
+
135
+ # Main title
136
+ h1 = etree.SubElement(header, "h1", {"class": "vol-title-main"})
137
+ h1.text = line1
138
+
139
+ # Bottom decorative border (flipped)
140
+ header.append(_make_vol_border_img(flip=True))
141
+
142
+ # Subtitle (if any)
143
+ if line2:
144
+ h2 = etree.SubElement(header, "h2", {"class": "vol-title-sub"})
145
+ h2.text = line2
146
+
147
+ # Intro text paragraphs
148
+ if volume_intro_text:
149
+ etree.SubElement(root, "p", {"class": "new-page-after"})
150
+
151
+ vol_div = etree.SubElement(root, "div", {"class": "vol-intro-text"})
152
+ for line in volume_intro_text.splitlines():
153
+ line = line.strip()
154
+ if not line:
155
+ continue
156
+ p = etree.SubElement(vol_div, "p")
157
+ p.text = line
158
+
159
+ html_string: str = html.tostring(
160
+ root,
161
+ pretty_print=PRETTY_PRINT_FLAG,
162
+ encoding="unicode",
163
+ )
164
+ return html_string
165
+
166
+
167
+ def _add_li(ul: etree._Element, label: str, value: str) -> None:
168
+ """
169
+ Append a `<li>` with 'label: value' if value is nonempty.
170
+ """
171
+ if value:
172
+ li = etree.SubElement(ul, "li")
173
+ li.text = f"{label}: {value}"
174
+
175
+
176
+ def _make_vol_border_img(flip: bool = False) -> html.HtmlElement:
177
+ """
178
+ Return a `<div>` containing the `volume_border.png` image,
179
+ styled by the given class name.
180
+ """
181
+ classes = ["vol-border"]
182
+ if flip:
183
+ classes.append("flip")
184
+ cls = " ".join(classes)
185
+
186
+ div = html.Element("div", {"class": cls})
187
+ etree.SubElement(
188
+ div,
189
+ "img",
190
+ {
191
+ "src": f"../{IMAGE_FOLDER}/volume_border.png",
192
+ "alt": "",
193
+ },
194
+ )
195
+ return div
196
+
197
+
198
+ def _split_volume_title(volume_title: str) -> tuple[str, str]:
199
+ """
200
+ Split volume title into two parts for better display.
201
+
202
+ :param volume_title: Original volume title string.
203
+ :return: Tuple of (line1, line2)
204
+ """
205
+ if " " in volume_title:
206
+ parts = volume_title.split(" ", 1)
207
+ elif "-" in volume_title:
208
+ parts = volume_title.split("-", 1)
209
+ else:
210
+ return volume_title, ""
211
+
212
+ return parts[0], parts[1]
@@ -17,18 +17,6 @@ Included utilities:
17
17
  - read_text_file / read_json_file / read_binary_file: load content from file
18
18
  """
19
19
 
20
- from .io import (
21
- load_blacklisted_words,
22
- load_text_resource,
23
- read_binary_file,
24
- read_json_file,
25
- read_text_file,
26
- save_as_json,
27
- save_as_txt,
28
- )
29
- from .normalize import normalize_txt_line_endings
30
- from .sanitize import sanitize_filename
31
-
32
20
  __all__ = [
33
21
  "sanitize_filename",
34
22
  "save_as_json",
@@ -36,7 +24,15 @@ __all__ = [
36
24
  "read_text_file",
37
25
  "read_json_file",
38
26
  "read_binary_file",
39
- "load_text_resource",
40
- "load_blacklisted_words",
41
27
  "normalize_txt_line_endings",
42
28
  ]
29
+
30
+ from .io import (
31
+ read_binary_file,
32
+ read_json_file,
33
+ read_text_file,
34
+ save_as_json,
35
+ save_as_txt,
36
+ )
37
+ from .normalize import normalize_txt_line_endings
38
+ from .sanitize import sanitize_filename
@@ -11,10 +11,17 @@ Includes:
11
11
  - Simple helpers for reading files with fallback and logging
12
12
  """
13
13
 
14
+ __all__ = [
15
+ "save_as_txt",
16
+ "save_as_json",
17
+ "read_text_file",
18
+ "read_json_file",
19
+ "read_binary_file",
20
+ ]
21
+
14
22
  import json
15
23
  import logging
16
24
  import tempfile
17
- from importlib.resources import files
18
25
  from pathlib import Path
19
26
  from typing import Any, Literal
20
27
 
@@ -42,12 +49,12 @@ def _get_non_conflicting_path(path: Path) -> Path:
42
49
  def _write_file(
43
50
  content: str | bytes | dict[Any, Any] | list[Any] | Any,
44
51
  filepath: str | Path,
45
- mode: str | None = None,
52
+ write_mode: str = "w",
46
53
  *,
47
54
  on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
48
55
  dump_json: bool = False,
49
56
  encoding: str = "utf-8",
50
- ) -> bool:
57
+ ) -> Path | None:
51
58
  """
52
59
  Write content to a file safely with optional atomic behavior
53
60
  and JSON serialization.
@@ -60,7 +67,7 @@ def _write_file(
60
67
  or 'rename'.
61
68
  :param dump_json: If True, serialize content as JSON.
62
69
  :param encoding: Text encoding for writing.
63
- :return: True if writing succeeds, False otherwise.
70
+ :return: Path if writing succeeds, None otherwise.
64
71
  """
65
72
  path = Path(filepath)
66
73
  path = path.with_name(sanitize_filename(path.name))
@@ -69,7 +76,7 @@ def _write_file(
69
76
  if path.exists():
70
77
  if on_exist == "skip":
71
78
  logger.debug("[file] '%s' exists, skipping", path)
72
- return False
79
+ return path
73
80
  if on_exist == "rename":
74
81
  path = _get_non_conflicting_path(path)
75
82
  logger.debug("[file] Renaming target to avoid conflict: %s", path)
@@ -104,10 +111,10 @@ def _write_file(
104
111
  tmp_path = Path(tmp.name)
105
112
  tmp_path.replace(path)
106
113
  logger.debug("[file] '%s' written successfully", path)
107
- return True
114
+ return path
108
115
  except Exception as exc:
109
116
  logger.warning("[file] Error writing %r: %s", path, exc)
110
- return False
117
+ return None
111
118
 
112
119
 
113
120
  def save_as_txt(
@@ -116,7 +123,7 @@ def save_as_txt(
116
123
  *,
117
124
  encoding: str = "utf-8",
118
125
  on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
119
- ) -> bool:
126
+ ) -> Path | None:
120
127
  """
121
128
  Save plain text content to the given file path.
122
129
 
@@ -124,12 +131,12 @@ def save_as_txt(
124
131
  :param filepath: Destination file path.
125
132
  :param encoding: Text encoding to use (default: 'utf-8').
126
133
  :param on_exist: How to handle existing files: 'overwrite', 'skip', or 'rename'.
127
- :return: True if successful, False otherwise.
134
+ :return: Path if writing succeeds, None otherwise.
128
135
  """
129
136
  return _write_file(
130
137
  content=content,
131
138
  filepath=filepath,
132
- mode="w",
139
+ write_mode="w",
133
140
  on_exist=on_exist,
134
141
  dump_json=False,
135
142
  encoding=encoding,
@@ -142,7 +149,7 @@ def save_as_json(
142
149
  *,
143
150
  encoding: str = "utf-8",
144
151
  on_exist: Literal["overwrite", "skip", "rename"] = "overwrite",
145
- ) -> bool:
152
+ ) -> Path | None:
146
153
  """
147
154
  Save JSON-serializable content to the given file path.
148
155
 
@@ -150,12 +157,12 @@ def save_as_json(
150
157
  :param filepath: Destination file path.
151
158
  :param encoding: Text encoding to use (default: 'utf-8').
152
159
  :param on_exist: How to handle existing files: 'overwrite', 'skip', or 'rename'.
153
- :return: True if successful, False otherwise.
160
+ :return: Path if writing succeeds, None otherwise.
154
161
  """
155
162
  return _write_file(
156
163
  content=content,
157
164
  filepath=filepath,
158
- mode="w",
165
+ write_mode="w",
159
166
  on_exist=on_exist,
160
167
  dump_json=True,
161
168
  encoding=encoding,
@@ -207,41 +214,3 @@ def read_binary_file(filepath: str | Path) -> bytes | None:
207
214
  except Exception as e:
208
215
  logger.warning("[file] Failed to read %r: %s", path, e)
209
216
  return None
210
-
211
-
212
- def load_text_resource(
213
- filename: str,
214
- package: str = "novel_downloader.resources.text",
215
- ) -> str:
216
- """
217
- Load and return the contents of a text resource.
218
-
219
- :param filename: Name of the text file (e.g. "blacklist.txt").
220
- :param package: Package path where resources live (default: text resources).
221
- For other resource types, point to the appropriate subpackage
222
- (e.g. "novel_downloader.resources.css").
223
- :return: File contents as a string.
224
- """
225
- resource_path = files(package).joinpath(filename)
226
- return resource_path.read_text(encoding="utf-8")
227
-
228
-
229
- def load_blacklisted_words() -> set[str]:
230
- """
231
- Convenience loader for the blacklist.txt in the text resources.
232
-
233
- :return: A set of non-empty, stripped lines from blacklist.txt.
234
- """
235
- text = load_text_resource("blacklist.txt")
236
- return {line.strip() for line in text.splitlines() if line.strip()}
237
-
238
-
239
- __all__ = [
240
- "save_as_txt",
241
- "save_as_json",
242
- "read_text_file",
243
- "read_json_file",
244
- "read_binary_file",
245
- "load_text_resource",
246
- "load_blacklisted_words",
247
- ]
@@ -9,6 +9,8 @@ across platforms or output formats.
9
9
  Currently includes line-ending normalization for .txt files.
10
10
  """
11
11
 
12
+ __all__ = ["normalize_txt_line_endings"]
13
+
12
14
  import logging
13
15
  from pathlib import Path
14
16
 
@@ -46,8 +48,6 @@ def normalize_txt_line_endings(folder_path: str | Path) -> None:
46
48
  return
47
49
 
48
50
 
49
- __all__ = ["normalize_txt_line_endings"]
50
-
51
51
  if __name__ == "__main__": # pragma: no cover
52
52
  import argparse
53
53
 
@@ -11,6 +11,8 @@ that replaces or removes illegal characters from filenames, trims
11
11
  lengths, and avoids reserved names on Windows systems.
12
12
  """
13
13
 
14
+ __all__ = ["sanitize_filename"]
15
+
14
16
  import logging
15
17
  import os
16
18
  import re
@@ -65,6 +67,3 @@ def sanitize_filename(filename: str, max_length: int | None = 255) -> str:
65
67
  cleaned = "_untitled"
66
68
  logger.debug("[file] Sanitized filename: %r -> %r", filename, cleaned)
67
69
  return cleaned
68
-
69
-
70
- __all__ = ["sanitize_filename"]
@@ -14,9 +14,9 @@ Supports:
14
14
  Exposes the selected OCR engine version via `FontOCR`.
15
15
  """
16
16
 
17
- # from .ocr_v1 import FontOCRV1 as FontOCR
18
- from .ocr_v2 import FontOCRV2 as FontOCR
19
-
20
- __version__ = "v2"
21
-
22
17
  __all__ = ["FontOCR"]
18
+ __version__ = "3.0"
19
+
20
+ # from .ocr_v1 import FontOCRV1 as FontOCR
21
+ # from .ocr_v2 import FontOCRV2 as FontOCR
22
+ from .ocr_v3 import FontOCRV3 as FontOCR
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.utils.hash_store
4
- ---------------------------------
3
+ novel_downloader.utils.fontocr.hash_store
4
+ -----------------------------------------
5
5
 
6
6
  Manage a small collection of image perceptual hashes and their labels.
7
7
  Supports loading/saving to .json or .npy, and basic CRUD + search.
@@ -15,10 +15,11 @@ from pathlib import Path
15
15
 
16
16
  from PIL import Image
17
17
 
18
- from .constants import HASH_STORE_FILE
18
+ from ..constants import DATA_DIR
19
19
  from .hash_utils import HASH_DISTANCE_THRESHOLD, fast_hamming_distance, phash
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
+ HASH_STORE_FILE = DATA_DIR / "image_hashes.json"
22
23
 
23
24
 
24
25
  class _BKNode:
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.utils.hash_utils
4
- ---------------------------------
3
+ novel_downloader.utils.fontocr.hash_utils
4
+ -----------------------------------------
5
5
 
6
6
  Utilities for image perceptual hashing and comparison.
7
7
 
@@ -24,8 +24,8 @@ from novel_downloader.utils.constants import (
24
24
  REC_CHAR_MODEL_FILES,
25
25
  REC_IMAGE_SHAPE_MAP,
26
26
  )
27
- from novel_downloader.utils.hash_store import img_hash_store
28
27
 
28
+ from .hash_store import img_hash_store
29
29
  from .model_loader import get_rec_chinese_char_model_dir
30
30
 
31
31
  logger = logging.getLogger(__name__)
@@ -301,3 +301,15 @@ class FontOCRV1:
301
301
  logger.error("[FontOCR] Failed to save fixed map: %s", e)
302
302
 
303
303
  return mapping_result
304
+
305
+ @staticmethod
306
+ def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
307
+ """
308
+ Replace each character in `text` using `font_map`,
309
+ leaving unmapped characters unchanged.
310
+
311
+ :param text: The input string, possibly containing obfuscated font chars.
312
+ :param font_map: A dict mapping obfuscated chars to real chars.
313
+ :return: The de-obfuscated text.
314
+ """
315
+ return "".join(font_map.get(ch, ch) for ch in text)
@@ -35,8 +35,8 @@ from novel_downloader.utils.constants import (
35
35
  REC_CHAR_MODEL_FILES,
36
36
  REC_IMAGE_SHAPE_MAP,
37
37
  )
38
- from novel_downloader.utils.hash_store import img_hash_store
39
38
 
39
+ from .hash_store import img_hash_store
40
40
  from .model_loader import (
41
41
  get_rec_char_vector_dir,
42
42
  get_rec_chinese_char_model_dir,
@@ -750,3 +750,15 @@ class FontOCRV2:
750
750
  logger.error("[FontOCR] Failed to save fixed map: %s", e)
751
751
 
752
752
  return mapping_result
753
+
754
+ @staticmethod
755
+ def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
756
+ """
757
+ Replace each character in `text` using `font_map`,
758
+ leaving unmapped characters unchanged.
759
+
760
+ :param text: The input string, possibly containing obfuscated font chars.
761
+ :param font_map: A dict mapping obfuscated chars to real chars.
762
+ :return: The de-obfuscated text.
763
+ """
764
+ return "".join(font_map.get(ch, ch) for ch in text)