novel-downloader 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. novel_downloader/__init__.py +14 -0
  2. novel_downloader/cli/__init__.py +14 -0
  3. novel_downloader/cli/clean.py +134 -0
  4. novel_downloader/cli/download.py +132 -0
  5. novel_downloader/cli/interactive.py +67 -0
  6. novel_downloader/cli/main.py +45 -0
  7. novel_downloader/cli/settings.py +177 -0
  8. novel_downloader/config/__init__.py +52 -0
  9. novel_downloader/config/adapter.py +153 -0
  10. novel_downloader/config/loader.py +177 -0
  11. novel_downloader/config/models.py +173 -0
  12. novel_downloader/config/site_rules.py +97 -0
  13. novel_downloader/core/__init__.py +25 -0
  14. novel_downloader/core/downloaders/__init__.py +22 -0
  15. novel_downloader/core/downloaders/base_async_downloader.py +157 -0
  16. novel_downloader/core/downloaders/base_downloader.py +187 -0
  17. novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
  18. novel_downloader/core/downloaders/common_downloader.py +191 -0
  19. novel_downloader/core/downloaders/qidian_downloader.py +208 -0
  20. novel_downloader/core/factory/__init__.py +33 -0
  21. novel_downloader/core/factory/downloader_factory.py +149 -0
  22. novel_downloader/core/factory/parser_factory.py +62 -0
  23. novel_downloader/core/factory/requester_factory.py +106 -0
  24. novel_downloader/core/factory/saver_factory.py +49 -0
  25. novel_downloader/core/interfaces/__init__.py +32 -0
  26. novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
  27. novel_downloader/core/interfaces/async_requester_protocol.py +68 -0
  28. novel_downloader/core/interfaces/downloader_protocol.py +37 -0
  29. novel_downloader/core/interfaces/parser_protocol.py +40 -0
  30. novel_downloader/core/interfaces/requester_protocol.py +65 -0
  31. novel_downloader/core/interfaces/saver_protocol.py +61 -0
  32. novel_downloader/core/parsers/__init__.py +28 -0
  33. novel_downloader/core/parsers/base_parser.py +96 -0
  34. novel_downloader/core/parsers/common_parser/__init__.py +14 -0
  35. novel_downloader/core/parsers/common_parser/helper.py +321 -0
  36. novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
  37. novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
  38. novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
  39. novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
  40. novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
  41. novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
  42. novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
  43. novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
  44. novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
  45. novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
  46. novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
  47. novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
  48. novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
  49. novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
  50. novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
  51. novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
  52. novel_downloader/core/requesters/__init__.py +31 -0
  53. novel_downloader/core/requesters/base_async_session.py +297 -0
  54. novel_downloader/core/requesters/base_browser.py +210 -0
  55. novel_downloader/core/requesters/base_session.py +243 -0
  56. novel_downloader/core/requesters/common_requester/__init__.py +18 -0
  57. novel_downloader/core/requesters/common_requester/common_async_session.py +96 -0
  58. novel_downloader/core/requesters/common_requester/common_session.py +126 -0
  59. novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
  60. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
  61. novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
  62. novel_downloader/core/savers/__init__.py +20 -0
  63. novel_downloader/core/savers/base_saver.py +169 -0
  64. novel_downloader/core/savers/common_saver/__init__.py +13 -0
  65. novel_downloader/core/savers/common_saver/common_epub.py +232 -0
  66. novel_downloader/core/savers/common_saver/common_txt.py +176 -0
  67. novel_downloader/core/savers/common_saver/main_saver.py +86 -0
  68. novel_downloader/core/savers/epub_utils/__init__.py +27 -0
  69. novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
  70. novel_downloader/core/savers/epub_utils/initializer.py +98 -0
  71. novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
  72. novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
  73. novel_downloader/core/savers/qidian_saver.py +22 -0
  74. novel_downloader/locales/en.json +91 -0
  75. novel_downloader/locales/zh.json +91 -0
  76. novel_downloader/resources/config/rules.toml +196 -0
  77. novel_downloader/resources/config/settings.yaml +73 -0
  78. novel_downloader/resources/css_styles/main.css +104 -0
  79. novel_downloader/resources/css_styles/volume-intro.css +56 -0
  80. novel_downloader/resources/images/volume_border.png +0 -0
  81. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
  82. novel_downloader/resources/json/replace_word_map.json +4 -0
  83. novel_downloader/resources/text/blacklist.txt +22 -0
  84. novel_downloader/utils/__init__.py +0 -0
  85. novel_downloader/utils/cache.py +24 -0
  86. novel_downloader/utils/constants.py +158 -0
  87. novel_downloader/utils/crypto_utils.py +144 -0
  88. novel_downloader/utils/file_utils/__init__.py +43 -0
  89. novel_downloader/utils/file_utils/io.py +252 -0
  90. novel_downloader/utils/file_utils/normalize.py +68 -0
  91. novel_downloader/utils/file_utils/sanitize.py +77 -0
  92. novel_downloader/utils/fontocr/__init__.py +23 -0
  93. novel_downloader/utils/fontocr/ocr_v1.py +304 -0
  94. novel_downloader/utils/fontocr/ocr_v2.py +658 -0
  95. novel_downloader/utils/hash_store.py +288 -0
  96. novel_downloader/utils/hash_utils.py +103 -0
  97. novel_downloader/utils/i18n.py +41 -0
  98. novel_downloader/utils/logger.py +104 -0
  99. novel_downloader/utils/model_loader.py +72 -0
  100. novel_downloader/utils/network.py +287 -0
  101. novel_downloader/utils/state.py +156 -0
  102. novel_downloader/utils/text_utils/__init__.py +27 -0
  103. novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
  104. novel_downloader/utils/text_utils/diff_display.py +75 -0
  105. novel_downloader/utils/text_utils/font_mapping.py +31 -0
  106. novel_downloader/utils/text_utils/text_cleaning.py +57 -0
  107. novel_downloader/utils/time_utils/__init__.py +22 -0
  108. novel_downloader/utils/time_utils/datetime_utils.py +146 -0
  109. novel_downloader/utils/time_utils/sleep_utils.py +49 -0
  110. novel_downloader-1.1.0.dist-info/METADATA +157 -0
  111. novel_downloader-1.1.0.dist-info/RECORD +115 -0
  112. novel_downloader-1.1.0.dist-info/WHEEL +5 -0
  113. novel_downloader-1.1.0.dist-info/entry_points.txt +2 -0
  114. novel_downloader-1.1.0.dist-info/licenses/LICENSE +21 -0
  115. novel_downloader-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.savers.base_saver
5
+ ---------------------------------------
6
+
7
+ This module provides an abstract base class `BaseSaver` that defines the
8
+ common interface and reusable logic for saving assembled novel content
9
+ into various output formats.
10
+ """
11
+
12
+ import abc
13
+ import logging
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+ from typing import Any, Dict, Optional
17
+
18
+ from novel_downloader.config.models import SaverConfig
19
+ from novel_downloader.core.interfaces import SaverProtocol
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class SafeDict(Dict[str, Any]):
25
+ def __missing__(self, key: str) -> str:
26
+ return "{{{}}}".format(key)
27
+
28
+
29
+ class BaseSaver(SaverProtocol, abc.ABC):
30
+ """
31
+ BaseSaver defines the interface and common structure for
32
+ saving assembled book content into various formats
33
+ such as TXT, EPUB, Markdown, or PDF.
34
+ """
35
+
36
+ def __init__(self, config: SaverConfig):
37
+ """
38
+ Initialize the saver with given configuration.
39
+
40
+ :param config: A SaverConfig object that defines
41
+ save paths, formats, and options.
42
+ """
43
+ self._config = config
44
+
45
+ self._raw_data_dir = Path(config.raw_data_dir)
46
+ self._output_dir = Path(config.output_dir)
47
+ self._raw_data_dir.mkdir(parents=True, exist_ok=True)
48
+ self._output_dir.mkdir(parents=True, exist_ok=True)
49
+
50
+ self._filename_template = config.filename_template
51
+
52
+ def save(self, book_id: str) -> None:
53
+ """
54
+ Save the book in the formats specified in config.
55
+ If a method is not implemented or fails, log the error and continue.
56
+
57
+ :param book_id: The book identifier (used for filename, lookup, etc.)
58
+ """
59
+ TAG = "[Saver]"
60
+ actions = [
61
+ ("make_txt", self.save_as_txt),
62
+ ("make_epub", self.save_as_epub),
63
+ ("make_md", self.save_as_md),
64
+ ("make_pdf", self.save_as_pdf),
65
+ ]
66
+
67
+ for flag_name, save_method in actions:
68
+ if getattr(self._config, flag_name, False):
69
+ try:
70
+ logger.info(
71
+ "%s Attempting to save book_id '%s' as %s...",
72
+ TAG,
73
+ book_id,
74
+ flag_name,
75
+ )
76
+ save_method(book_id)
77
+ logger.info("%s Successfully saved as %s.", TAG, flag_name)
78
+ except NotImplementedError as e:
79
+ logger.warning(
80
+ "%s Save method for %s not implemented: %s",
81
+ TAG,
82
+ flag_name,
83
+ str(e),
84
+ )
85
+ except Exception as e:
86
+ logger.error(
87
+ "%s Error while saving as %s: %s", TAG, flag_name, str(e)
88
+ )
89
+ return
90
+
91
+ @abc.abstractmethod
92
+ def save_as_txt(self, book_id: str) -> None:
93
+ """
94
+ Persist the assembled book as a .txt file.
95
+
96
+ This method must be implemented by all subclasses.
97
+
98
+ :param book_id: The book identifier (used for filename, lookup, etc.)
99
+ """
100
+ ...
101
+
102
+ def save_as_epub(self, book_id: str) -> None:
103
+ """
104
+ Optional: Persist the assembled book as a EPUB (.epub) file.
105
+
106
+ :param book_id: The book identifier.
107
+ :raises NotImplementedError: If the method is not overridden.
108
+ """
109
+ raise NotImplementedError("EPUB export not supported by this saver.")
110
+
111
+ def save_as_md(self, book_id: str) -> None:
112
+ """
113
+ Optional: Persist the assembled book as a Markdown file.
114
+
115
+ :param book_id: The book identifier.
116
+ :raises NotImplementedError: If the method is not overridden.
117
+ """
118
+ raise NotImplementedError("Markdown export not supported by this saver.")
119
+
120
+ def save_as_pdf(self, book_id: str) -> None:
121
+ """
122
+ Optional: Persist the assembled book as a PDF file.
123
+
124
+ :param book_id: The book identifier.
125
+ :raises NotImplementedError: If the method is not overridden.
126
+ """
127
+ raise NotImplementedError("PDF export not supported by this saver.")
128
+
129
+ def get_filename(
130
+ self,
131
+ *,
132
+ title: str,
133
+ author: Optional[str] = None,
134
+ ext: str = "txt",
135
+ **extra_fields: str,
136
+ ) -> str:
137
+ """
138
+ Generate a filename based on the configured template and metadata fields.
139
+
140
+ :param title: Book title (required).
141
+ :param author: Author name (optional).
142
+ :param ext: File extension (e.g., "txt", "epub").
143
+ :param extra_fields: Any additional fields used in the filename template.
144
+ :return: Formatted filename with extension.
145
+ """
146
+ # Merge all fields with defaults
147
+ context = SafeDict(title=title, author=author or "", **extra_fields)
148
+
149
+ name = self._filename_template.format_map(context)
150
+
151
+ if self._config.append_timestamp:
152
+ name += f"_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
153
+
154
+ return f"{name}.{ext}"
155
+
156
+ @property
157
+ def output_dir(self) -> Path:
158
+ """Access the output directory for saving files."""
159
+ return self._output_dir
160
+
161
+ @property
162
+ def raw_data_dir(self) -> Path:
163
+ """Access the raw data directory."""
164
+ return self._raw_data_dir
165
+
166
+ @property
167
+ def filename_template(self) -> str:
168
+ """Access the filename template."""
169
+ return self._filename_template
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.savers.common_saver
5
+ -----------------------------------------
6
+
7
+ This module provides the `CommonSaver` class for handling the saving process
8
+ of novels.
9
+ """
10
+
11
+ from .main_saver import CommonSaver
12
+
13
+ __all__ = ["CommonSaver"]
@@ -0,0 +1,232 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.savers.common_saver.common_epub
5
+ -----------------------------------------------------
6
+
7
+ Contains the logic for exporting novel content as a single `.epub` file.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import logging
14
+ from pathlib import Path
15
+ from typing import TYPE_CHECKING, List, Optional
16
+ from urllib.parse import unquote, urlparse
17
+
18
+ from ebooklib import epub
19
+
20
+ from novel_downloader.core.savers.epub_utils import (
21
+ chapter_txt_to_html,
22
+ create_css_items,
23
+ create_volume_intro,
24
+ generate_book_intro_html,
25
+ init_epub,
26
+ )
27
+ from novel_downloader.utils.constants import (
28
+ DEFAULT_IMAGE_SUFFIX,
29
+ EPUB_OPTIONS,
30
+ EPUB_TEXT_FOLDER,
31
+ )
32
+ from novel_downloader.utils.file_utils import sanitize_filename
33
+ from novel_downloader.utils.text_utils import clean_chapter_title
34
+
35
+ if TYPE_CHECKING:
36
+ from .main_saver import CommonSaver
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+ CHAPTER_FOLDERS: List[str] = [
41
+ "chapters",
42
+ "encrypted_chapters",
43
+ ]
44
+
45
+
46
+ def _find_chapter_file(
47
+ raw_base: Path,
48
+ chapter_id: str,
49
+ ) -> Optional[Path]:
50
+ """
51
+ Search for `<chapter_id>.json` under each folder in CHAPTER_FOLDERS
52
+ inside raw_data_dir/site/book_id. Return the first existing Path,
53
+ or None if not found.
54
+ """
55
+ for folder in CHAPTER_FOLDERS:
56
+ candidate = raw_base / folder / f"{chapter_id}.json"
57
+ if candidate.exists():
58
+ return candidate
59
+ return None
60
+
61
+
62
+ def _image_url_to_filename(url: str) -> str:
63
+ """
64
+ Parse and sanitize a image filename from a URL.
65
+ If no filename or suffix exists, fallback to default name and extension.
66
+
67
+ :param url: URL string
68
+ :return: Safe filename string
69
+ """
70
+ if not url:
71
+ return ""
72
+
73
+ parsed_url = urlparse(url)
74
+ path = unquote(parsed_url.path)
75
+ filename = Path(path).name
76
+
77
+ if not filename:
78
+ filename = "image"
79
+
80
+ if not Path(filename).suffix:
81
+ filename += DEFAULT_IMAGE_SUFFIX
82
+
83
+ return filename
84
+
85
+
86
+ def common_save_as_epub(
87
+ saver: CommonSaver,
88
+ book_id: str,
89
+ ) -> None:
90
+ """
91
+ Export a single novel (identified by `book_id`) to an EPUB file.
92
+
93
+ This function will:
94
+ 1. Load `book_info.json` for metadata.
95
+ 2. Generate introductory HTML and optionally include the cover image.
96
+ 3. Initialize the EPUB container.
97
+ 4. Iterate through volumes and chapters, convert each to XHTML.
98
+ 5. Assemble the spine, TOC, CSS and write out the final `.epub`.
99
+
100
+ :param saver: The saver instance, carrying config and path info.
101
+ :param book_id: Identifier of the novel (used as subdirectory name).
102
+ """
103
+ TAG = "[saver]"
104
+ site = saver.site
105
+ config = saver._config
106
+ # --- Paths & options ---
107
+ raw_base = saver.raw_data_dir / site / book_id
108
+ out_dir = saver.output_dir
109
+ out_dir.mkdir(parents=True, exist_ok=True)
110
+
111
+ # --- Load book_info.json ---
112
+ info_path = raw_base / "book_info.json"
113
+ try:
114
+ info_text = info_path.read_text(encoding="utf-8")
115
+ book_info = json.loads(info_text)
116
+ except Exception as e:
117
+ logger.error("%s Failed to load %s: %s", TAG, info_path, e)
118
+ return
119
+
120
+ book_name = book_info.get("book_name", book_id)
121
+ logger.info("%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id)
122
+
123
+ # --- Generate intro + cover ---
124
+ intro_html = generate_book_intro_html(book_info)
125
+ cover_path: Optional[Path] = None
126
+ if config.include_cover:
127
+ cover_filename = _image_url_to_filename(book_info.get("cover_url", ""))
128
+ if cover_filename:
129
+ cover_path = raw_base / cover_filename
130
+
131
+ # --- Initialize EPUB ---
132
+ book, spine, toc_list = init_epub(
133
+ book_info=book_info,
134
+ book_id=book_id,
135
+ intro_html=intro_html,
136
+ book_cover_path=cover_path,
137
+ include_toc=config.include_toc,
138
+ )
139
+ for css in create_css_items(
140
+ include_main=True,
141
+ include_volume=True,
142
+ ):
143
+ book.add_item(css)
144
+
145
+ # --- Compile chapters ---
146
+ volumes = book_info.get("volumes", [])
147
+ for vol_index, vol in enumerate(volumes, start=1):
148
+ raw_vol_name = vol.get("volume_name", "").strip()
149
+ vol_name = clean_chapter_title(raw_vol_name) or f"Unknown Volume {vol_index}"
150
+ logger.info("Processing volume %d: %s", vol_index, vol_name)
151
+
152
+ # Volume intro
153
+ vol_intro = epub.EpubHtml(
154
+ title=vol_name,
155
+ file_name=f"{EPUB_TEXT_FOLDER}/volume_intro_{vol_index}.xhtml",
156
+ lang="zh",
157
+ )
158
+ vol_intro.content = create_volume_intro(vol_name, vol.get("volume_intro", ""))
159
+ vol_intro.add_link(
160
+ href="../Styles/volume-intro.css",
161
+ rel="stylesheet",
162
+ type="text/css",
163
+ )
164
+ book.add_item(vol_intro)
165
+ spine.append(vol_intro)
166
+
167
+ section = epub.Section(vol_name, vol_intro.file_name)
168
+ chapter_items: List[epub.EpubHtml] = []
169
+
170
+ for chap in vol.get("chapters", []):
171
+ chap_id = chap.get("chapterId")
172
+ chap_title = chap.get("title", "")
173
+ if not chap_id:
174
+ logger.warning("%s Missing chapterId, skipping: %s", TAG, chap)
175
+ continue
176
+
177
+ json_path = _find_chapter_file(raw_base, chap_id)
178
+ if json_path is None:
179
+ logger.info(
180
+ "%s Missing chapter file: %s (%s), skipping.",
181
+ TAG,
182
+ chap_title,
183
+ chap_id,
184
+ )
185
+ continue
186
+
187
+ try:
188
+ data = json.loads(json_path.read_text(encoding="utf-8"))
189
+ title = clean_chapter_title(data.get("title", "")) or chap_id
190
+ chap_html = chapter_txt_to_html(
191
+ chapter_title=title,
192
+ chapter_text=data.get("content", ""),
193
+ author_say=data.get("author_say", ""),
194
+ )
195
+ except Exception as e:
196
+ logger.error("%s Error parsing chapter %s: %s", TAG, json_path, e)
197
+ continue
198
+
199
+ chap_path = f"{EPUB_TEXT_FOLDER}/{chap_id}.xhtml"
200
+ item = epub.EpubHtml(title=chap_title, file_name=chap_path, lang="zh")
201
+ item.content = chap_html
202
+ item.add_link(
203
+ href="../Styles/main.css",
204
+ rel="stylesheet",
205
+ type="text/css",
206
+ )
207
+ book.add_item(item)
208
+ spine.append(item)
209
+ chapter_items.append(item)
210
+
211
+ toc_list.append((section, chapter_items))
212
+
213
+ # --- 5. Finalize EPUB ---
214
+ logger.info("%s Building TOC and spine...", TAG)
215
+ book.toc = tuple(toc_list)
216
+ book.spine = spine
217
+ book.add_item(epub.EpubNcx())
218
+ book.add_item(epub.EpubNav())
219
+
220
+ out_name = saver.get_filename(
221
+ title=book_name,
222
+ author=book_info.get("author"),
223
+ ext="epub",
224
+ )
225
+ out_path = out_dir / sanitize_filename(out_name)
226
+
227
+ try:
228
+ epub.write_epub(out_path, book, EPUB_OPTIONS)
229
+ logger.info("%s EPUB successfully written to %s", TAG, out_path)
230
+ except Exception as e:
231
+ logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
232
+ return
@@ -0,0 +1,176 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.savers.common_saver.qidian_txt
5
+ ----------------------------------------------------
6
+
7
+ Contains the logic for exporting novel content as a single `.txt` file.
8
+
9
+ This module defines `common_save_as_txt` function, which assembles and formats
10
+ a novel based on metadata and chapter files found in the raw data directory.
11
+ It is intended to be used by `CommonSaver` as part of the save/export process.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import logging
18
+ from pathlib import Path
19
+ from typing import TYPE_CHECKING, List, Optional
20
+
21
+ from novel_downloader.utils.file_utils import save_as_txt
22
+ from novel_downloader.utils.text_utils import clean_chapter_title, format_chapter
23
+
24
+ if TYPE_CHECKING:
25
+ from .main_saver import CommonSaver
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ CHAPTER_FOLDERS: List[str] = [
30
+ "chapters",
31
+ "encrypted_chapters",
32
+ ]
33
+
34
+
35
+ def _find_chapter_file(
36
+ raw_base: Path,
37
+ chapter_id: str,
38
+ ) -> Optional[Path]:
39
+ """
40
+ Search for `<chapter_id>.json` under each folder in CHAPTER_FOLDERS
41
+ inside raw_data_dir/site/book_id. Return the first existing Path,
42
+ or None if not found.
43
+ """
44
+ for folder in CHAPTER_FOLDERS:
45
+ candidate = raw_base / folder / f"{chapter_id}.json"
46
+ if candidate.exists():
47
+ return candidate
48
+ return None
49
+
50
+
51
+ def common_save_as_txt(
52
+ saver: CommonSaver,
53
+ book_id: str,
54
+ ) -> None:
55
+ """
56
+ 将 save_path 文件夹中该小说的所有章节 json 文件合并保存为一个完整的 txt 文件,
57
+ 并保存到 out_path 下
58
+ 假设章节文件名格式为 `{chapterId}.json`
59
+
60
+ 处理流程:
61
+ 1. 从 book_info.json 中加载书籍信息 (包含书名、作者、简介及卷章节列表)
62
+ 2. 遍历各卷, 每个卷先追加卷标题, 然后依次追加该卷下各章节的标题和内容,
63
+ 同时记录最后一个章节标题作为“原文截至”
64
+ 3. 将书籍元信息 (书名、作者、原文截至、内容简介) 与所有章节内容拼接,
65
+ 构成最终完整文本
66
+ 4. 将最终结果保存到 out_path 下 (例如:`{book_name}.txt`)
67
+
68
+ :param book_id: Identifier of the novel (used as subdirectory name).
69
+ """
70
+ TAG = "[saver]"
71
+ site = saver.site
72
+ # --- Paths & options ---
73
+ raw_base = saver.raw_data_dir / site / book_id
74
+ out_dir = saver.output_dir
75
+ out_dir.mkdir(parents=True, exist_ok=True)
76
+
77
+ # --- Load book_info.json ---
78
+ info_path = raw_base / "book_info.json"
79
+ try:
80
+ info_text = info_path.read_text(encoding="utf-8")
81
+ book_info = json.loads(info_text)
82
+ except Exception as e:
83
+ logger.error("%s Failed to load %s: %s", TAG, info_path, e)
84
+ return
85
+
86
+ # --- Compile chapters ---
87
+ parts: List[str] = []
88
+ latest_chapter: str = ""
89
+ volumes = book_info.get("volumes", [])
90
+
91
+ for vol in volumes:
92
+ vol_name = vol.get("volume_name", "").strip()
93
+ vol_name = clean_chapter_title(vol_name)
94
+ if vol_name:
95
+ volume_header = f"\n\n{'=' * 6} {vol_name} {'=' * 6}\n\n"
96
+ parts.append(volume_header)
97
+ logger.info("%s Processing volume: %s", TAG, vol_name)
98
+ for chap in vol.get("chapters", []):
99
+ chap_id = chap.get("chapterId")
100
+ chap_title = chap.get("title", "")
101
+ if not chap_id:
102
+ logger.warning("%s Missing chapterId, skipping: %s", TAG, chap)
103
+ continue
104
+
105
+ # Find the JSON file in one of the known subfolders
106
+ json_path = _find_chapter_file(raw_base, chap_id)
107
+ if json_path is None:
108
+ logger.info(
109
+ "%s Missing chapter file in: %s (%s), skipping.",
110
+ TAG,
111
+ chap_title,
112
+ chap_id,
113
+ )
114
+ continue
115
+
116
+ try:
117
+ chapter_data = json.loads(json_path.read_text(encoding="utf-8"))
118
+ except Exception as e:
119
+ logger.error("%s Error reading %s: %s", TAG, json_path, e)
120
+ continue
121
+
122
+ # Extract structured fields
123
+ title = chapter_data.get("title", chap_title).strip()
124
+ content = chapter_data.get("content", "").strip()
125
+ author_say = chapter_data.get("author_say", "").strip()
126
+ clean_title = clean_chapter_title(title)
127
+
128
+ parts.append(format_chapter(clean_title, content, author_say))
129
+ latest_chapter = clean_title
130
+
131
+ # --- Build header ---
132
+ name = book_info.get("book_name")
133
+ author = book_info.get("author")
134
+ words = book_info.get("word_count")
135
+ updated = book_info.get("update_time")
136
+ summary = book_info.get("summary")
137
+
138
+ header_lines = []
139
+
140
+ if name:
141
+ header_lines.append(f"书名: {name}")
142
+
143
+ if author:
144
+ header_lines.append(f"作者: {author}")
145
+
146
+ if words:
147
+ header_lines.append(f"总字数: {words}")
148
+
149
+ if updated:
150
+ header_lines.append(f"更新日期: {updated}")
151
+
152
+ header_lines.append(f"原文截至: {latest_chapter}")
153
+
154
+ if summary:
155
+ header_lines.append("内容简介:")
156
+ header_lines.append(summary)
157
+
158
+ header_lines.append("")
159
+ header_lines.append("-" * 10)
160
+ header_lines.append("")
161
+
162
+ header = "\n".join(header_lines)
163
+
164
+ final_text = header + "\n\n" + "\n\n".join(parts).strip()
165
+
166
+ # --- Determine output file path ---
167
+ out_name = saver.get_filename(title=name, author=author, ext="txt")
168
+ out_path = out_dir / out_name
169
+
170
+ # --- Save final text ---
171
+ try:
172
+ save_as_txt(content=final_text, filepath=out_path)
173
+ logger.info("%s Novel saved to: %s", TAG, out_path)
174
+ except Exception as e:
175
+ logger.error("%s Failed to save file: %s", TAG, e)
176
+ return
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.savers.common_saver.main_saver
5
+ ----------------------------------------------------
6
+
7
+ This module implements the `QidianSaver` class, a concrete saver for handling
8
+ novel data from Qidian (起点中文网). It defines the logic to compile, structure,
9
+ and export novel content in plain text format based on the platform's metadata
10
+ and chapter files.
11
+ """
12
+
13
+ from novel_downloader.config.models import SaverConfig
14
+
15
+ from ..base_saver import BaseSaver
16
+ from .common_txt import common_save_as_txt
17
+
18
+
19
+ class CommonSaver(BaseSaver):
20
+ """
21
+ CommonSaver is a saver that processes and exports novels.
22
+ It extends the BaseSaver interface and provides
23
+ logic for exporting full novels as plain text (.txt) files.
24
+ """
25
+
26
+ def __init__(self, config: SaverConfig, site: str):
27
+ """
28
+ Initialize the common saver with site information.
29
+
30
+ :param config: A SaverConfig object that defines
31
+ save paths, formats, and options.
32
+ :param site: Identifier for the site the saver is handling.
33
+ """
34
+ super().__init__(config)
35
+ self._site = site
36
+
37
+ def save_as_txt(self, book_id: str) -> None:
38
+ """
39
+ Compile and save a complete novel as a single .txt file.
40
+
41
+ Processing steps:
42
+ 1. Load book metadata from `book_info.json`, including title,
43
+ author, summary, and chapter structure.
44
+ 2. Iterate through all volumes and chapters, appending each
45
+ volume/chapter title and content.
46
+ 3. Combine metadata and content into a final formatted text.
47
+ 4. Save the final result to the output directory using the
48
+ configured filename template.
49
+
50
+ :param book_id: The book identifier (used to locate raw data)
51
+ """
52
+ return common_save_as_txt(self, book_id)
53
+
54
+ def save_as_epub(self, book_id: str) -> None:
55
+ """
56
+ Persist the assembled book as a EPUB (.epub) file.
57
+
58
+ :param book_id: The book identifier.
59
+ :raises NotImplementedError: If the method is not overridden.
60
+ """
61
+ try:
62
+ from .common_epub import common_save_as_epub
63
+ except ImportError:
64
+ raise NotImplementedError(
65
+ "EPUB export not supported. Please install 'ebooklib'"
66
+ )
67
+
68
+ return common_save_as_epub(self, book_id)
69
+
70
+ @property
71
+ def site(self) -> str:
72
+ """
73
+ Get the site identifier.
74
+
75
+ :return: The site string.
76
+ """
77
+ return self._site
78
+
79
+ @site.setter
80
+ def site(self, value: str) -> None:
81
+ """
82
+ Set the site identifier.
83
+
84
+ :param value: New site string to set.
85
+ """
86
+ self._site = value
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.savers.epub_utils
5
+ ---------------------------------------
6
+
7
+ This package provides utility functions for constructing EPUB files,
8
+ including:
9
+
10
+ - CSS inclusion (create_css_items)
11
+ - EPUB book initialization (init_epub)
12
+ - Chapter text-to-HTML conversion (chapter_txt_to_html)
13
+ - Volume intro HTML generation (create_volume_intro)
14
+ """
15
+
16
+ from .css_builder import create_css_items
17
+ from .initializer import init_epub
18
+ from .text_to_html import chapter_txt_to_html, generate_book_intro_html
19
+ from .volume_intro import create_volume_intro
20
+
21
+ __all__ = [
22
+ "create_css_items",
23
+ "init_epub",
24
+ "chapter_txt_to_html",
25
+ "create_volume_intro",
26
+ "generate_book_intro_html",
27
+ ]