novel-downloader 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +14 -0
- novel_downloader/cli/__init__.py +14 -0
- novel_downloader/cli/clean.py +134 -0
- novel_downloader/cli/download.py +98 -0
- novel_downloader/cli/interactive.py +67 -0
- novel_downloader/cli/main.py +45 -0
- novel_downloader/cli/settings.py +177 -0
- novel_downloader/config/__init__.py +52 -0
- novel_downloader/config/adapter.py +150 -0
- novel_downloader/config/loader.py +177 -0
- novel_downloader/config/models.py +170 -0
- novel_downloader/config/site_rules.py +97 -0
- novel_downloader/core/__init__.py +25 -0
- novel_downloader/core/downloaders/__init__.py +20 -0
- novel_downloader/core/downloaders/base_downloader.py +187 -0
- novel_downloader/core/downloaders/common_downloader.py +192 -0
- novel_downloader/core/downloaders/qidian_downloader.py +208 -0
- novel_downloader/core/factory/__init__.py +21 -0
- novel_downloader/core/factory/downloader_factory.py +62 -0
- novel_downloader/core/factory/parser_factory.py +62 -0
- novel_downloader/core/factory/requester_factory.py +62 -0
- novel_downloader/core/factory/saver_factory.py +49 -0
- novel_downloader/core/interfaces/__init__.py +28 -0
- novel_downloader/core/interfaces/downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/parser_protocol.py +40 -0
- novel_downloader/core/interfaces/requester_protocol.py +65 -0
- novel_downloader/core/interfaces/saver_protocol.py +61 -0
- novel_downloader/core/parsers/__init__.py +28 -0
- novel_downloader/core/parsers/base_parser.py +96 -0
- novel_downloader/core/parsers/common_parser/__init__.py +14 -0
- novel_downloader/core/parsers/common_parser/helper.py +321 -0
- novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
- novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
- novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
- novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
- novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
- novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
- novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
- novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
- novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
- novel_downloader/core/requesters/__init__.py +27 -0
- novel_downloader/core/requesters/base_browser.py +210 -0
- novel_downloader/core/requesters/base_session.py +243 -0
- novel_downloader/core/requesters/common_requester/__init__.py +14 -0
- novel_downloader/core/requesters/common_requester/common_session.py +126 -0
- novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
- novel_downloader/core/savers/__init__.py +20 -0
- novel_downloader/core/savers/base_saver.py +169 -0
- novel_downloader/core/savers/common_saver/__init__.py +13 -0
- novel_downloader/core/savers/common_saver/common_epub.py +232 -0
- novel_downloader/core/savers/common_saver/common_txt.py +176 -0
- novel_downloader/core/savers/common_saver/main_saver.py +86 -0
- novel_downloader/core/savers/epub_utils/__init__.py +27 -0
- novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
- novel_downloader/core/savers/epub_utils/initializer.py +98 -0
- novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
- novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
- novel_downloader/core/savers/qidian_saver.py +22 -0
- novel_downloader/locales/en.json +91 -0
- novel_downloader/locales/zh.json +91 -0
- novel_downloader/resources/config/rules.toml +196 -0
- novel_downloader/resources/config/settings.yaml +70 -0
- novel_downloader/resources/css_styles/main.css +104 -0
- novel_downloader/resources/css_styles/volume-intro.css +56 -0
- novel_downloader/resources/images/volume_border.png +0 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
- novel_downloader/resources/json/replace_word_map.json +4 -0
- novel_downloader/resources/text/blacklist.txt +22 -0
- novel_downloader/utils/__init__.py +0 -0
- novel_downloader/utils/cache.py +24 -0
- novel_downloader/utils/constants.py +158 -0
- novel_downloader/utils/crypto_utils.py +144 -0
- novel_downloader/utils/file_utils/__init__.py +43 -0
- novel_downloader/utils/file_utils/io.py +252 -0
- novel_downloader/utils/file_utils/normalize.py +68 -0
- novel_downloader/utils/file_utils/sanitize.py +77 -0
- novel_downloader/utils/fontocr/__init__.py +23 -0
- novel_downloader/utils/fontocr/ocr_v1.py +304 -0
- novel_downloader/utils/fontocr/ocr_v2.py +658 -0
- novel_downloader/utils/hash_store.py +288 -0
- novel_downloader/utils/hash_utils.py +103 -0
- novel_downloader/utils/i18n.py +41 -0
- novel_downloader/utils/logger.py +104 -0
- novel_downloader/utils/model_loader.py +72 -0
- novel_downloader/utils/network.py +287 -0
- novel_downloader/utils/state.py +156 -0
- novel_downloader/utils/text_utils/__init__.py +27 -0
- novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
- novel_downloader/utils/text_utils/diff_display.py +75 -0
- novel_downloader/utils/text_utils/font_mapping.py +31 -0
- novel_downloader/utils/text_utils/text_cleaning.py +57 -0
- novel_downloader/utils/time_utils/__init__.py +22 -0
- novel_downloader/utils/time_utils/datetime_utils.py +146 -0
- novel_downloader/utils/time_utils/sleep_utils.py +49 -0
- novel_downloader-1.1.1.dist-info/METADATA +137 -0
- novel_downloader-1.1.1.dist-info/RECORD +109 -0
- novel_downloader-1.1.1.dist-info/WHEEL +5 -0
- novel_downloader-1.1.1.dist-info/entry_points.txt +2 -0
- novel_downloader-1.1.1.dist-info/licenses/LICENSE +21 -0
- novel_downloader-1.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,187 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.downloaders.base_downloader
|
5
|
+
-------------------------------------------------
|
6
|
+
|
7
|
+
Defines the abstract base class `BaseDownloader`, which provides a
|
8
|
+
common interface and reusable logic for all downloader implementations.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import abc
|
12
|
+
import logging
|
13
|
+
from pathlib import Path
|
14
|
+
from typing import List
|
15
|
+
|
16
|
+
from novel_downloader.config import DownloaderConfig
|
17
|
+
from novel_downloader.core.interfaces import (
|
18
|
+
DownloaderProtocol,
|
19
|
+
ParserProtocol,
|
20
|
+
RequesterProtocol,
|
21
|
+
SaverProtocol,
|
22
|
+
)
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
class BaseDownloader(DownloaderProtocol, abc.ABC):
|
28
|
+
"""
|
29
|
+
Abstract downloader that defines the initialization interface
|
30
|
+
and the general batch download flow.
|
31
|
+
|
32
|
+
Subclasses must implement the logic for downloading a single book.
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
requester: RequesterProtocol,
|
38
|
+
parser: ParserProtocol,
|
39
|
+
saver: SaverProtocol,
|
40
|
+
config: DownloaderConfig,
|
41
|
+
):
|
42
|
+
"""
|
43
|
+
Initialize the downloader with its components.
|
44
|
+
|
45
|
+
:param requester: Object implementing RequesterProtocol, used to fetch raw data.
|
46
|
+
:param parser: Object implementing ParserProtocol, used to parse page content.
|
47
|
+
:param saver: Object implementing SaverProtocol, used to save final output.
|
48
|
+
:param config: Downloader configuration object.
|
49
|
+
"""
|
50
|
+
self._requester = requester
|
51
|
+
self._parser = parser
|
52
|
+
self._saver = saver
|
53
|
+
self._config = config
|
54
|
+
self._raw_data_dir = Path(config.raw_data_dir)
|
55
|
+
self._cache_dir = Path(config.cache_dir)
|
56
|
+
self._raw_data_dir.mkdir(parents=True, exist_ok=True)
|
57
|
+
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
58
|
+
|
59
|
+
@abc.abstractmethod
|
60
|
+
def download_one(self, book_id: str) -> None:
|
61
|
+
"""
|
62
|
+
The full download logic for a single book.
|
63
|
+
|
64
|
+
Subclasses must implement this method.
|
65
|
+
|
66
|
+
:param book_id: The identifier of the book to download.
|
67
|
+
"""
|
68
|
+
...
|
69
|
+
|
70
|
+
def download(self, book_ids: List[str]) -> None:
|
71
|
+
"""
|
72
|
+
The general batch download process:
|
73
|
+
1. Iterate over all book IDs
|
74
|
+
2. For each ID, call `download_one()`
|
75
|
+
|
76
|
+
:param book_ids: A list of book identifiers to download.
|
77
|
+
"""
|
78
|
+
for idx, book_id in enumerate(book_ids, start=1):
|
79
|
+
try:
|
80
|
+
logger.debug(
|
81
|
+
"[downloader] Starting download for book_id: %s (%s/%s)",
|
82
|
+
book_id,
|
83
|
+
idx,
|
84
|
+
len(book_ids),
|
85
|
+
)
|
86
|
+
self.download_one(book_id)
|
87
|
+
except Exception as e:
|
88
|
+
self._handle_download_exception(book_id, e)
|
89
|
+
|
90
|
+
def before_download(self, book_id: str) -> None:
|
91
|
+
"""
|
92
|
+
Optional hook called before downloading each book.
|
93
|
+
|
94
|
+
Subclasses can override this method to perform pre-download setup.
|
95
|
+
|
96
|
+
:param book_id: The book ID about to be processed.
|
97
|
+
"""
|
98
|
+
pass
|
99
|
+
|
100
|
+
def _handle_download_exception(self, book_id: str, error: Exception) -> None:
|
101
|
+
"""
|
102
|
+
Handle download errors in a consistent way.
|
103
|
+
|
104
|
+
This method can be overridden or extended to implement retry logic, etc.
|
105
|
+
|
106
|
+
:param book_id: The ID of the book that failed.
|
107
|
+
:param error: The exception raised during download.
|
108
|
+
"""
|
109
|
+
logger.warning("[downloader] Failed to download %s: %s", book_id, error)
|
110
|
+
|
111
|
+
@property
|
112
|
+
def requester(self) -> RequesterProtocol:
|
113
|
+
"""
|
114
|
+
Access the current requester.
|
115
|
+
|
116
|
+
:return: The internal requester instance.
|
117
|
+
"""
|
118
|
+
return self._requester
|
119
|
+
|
120
|
+
@property
|
121
|
+
def parser(self) -> ParserProtocol:
|
122
|
+
"""
|
123
|
+
Access the current parser.
|
124
|
+
|
125
|
+
:return: The internal parser instance.
|
126
|
+
"""
|
127
|
+
return self._parser
|
128
|
+
|
129
|
+
@property
|
130
|
+
def saver(self) -> SaverProtocol:
|
131
|
+
"""
|
132
|
+
Access the current saver.
|
133
|
+
|
134
|
+
:return: The internal saver instance.
|
135
|
+
"""
|
136
|
+
return self._saver
|
137
|
+
|
138
|
+
@property
|
139
|
+
def config(self) -> DownloaderConfig:
|
140
|
+
"""
|
141
|
+
Access the downloader configuration.
|
142
|
+
|
143
|
+
:return: The internal DownloaderConfig object.
|
144
|
+
"""
|
145
|
+
return self._config
|
146
|
+
|
147
|
+
@property
|
148
|
+
def raw_data_dir(self) -> Path:
|
149
|
+
"""
|
150
|
+
Access the root directory for storing raw downloaded data.
|
151
|
+
|
152
|
+
:return: Path to the raw data directory.
|
153
|
+
"""
|
154
|
+
return self._raw_data_dir
|
155
|
+
|
156
|
+
@property
|
157
|
+
def cache_dir(self) -> Path:
|
158
|
+
"""
|
159
|
+
Access the directory used for temporary caching during download.
|
160
|
+
|
161
|
+
:return: Path to the cache directory.
|
162
|
+
"""
|
163
|
+
return self._cache_dir
|
164
|
+
|
165
|
+
def set_requester(self, requester: RequesterProtocol) -> None:
|
166
|
+
"""
|
167
|
+
Replace the requester instance with a new one.
|
168
|
+
|
169
|
+
:param requester: The new requester to be used.
|
170
|
+
"""
|
171
|
+
self._requester = requester
|
172
|
+
|
173
|
+
def set_parser(self, parser: ParserProtocol) -> None:
|
174
|
+
"""
|
175
|
+
Replace the parser instance with a new one.
|
176
|
+
|
177
|
+
:param parser: The new parser to be used.
|
178
|
+
"""
|
179
|
+
self._parser = parser
|
180
|
+
|
181
|
+
def set_saver(self, saver: SaverProtocol) -> None:
|
182
|
+
"""
|
183
|
+
Replace the saver instance with a new one.
|
184
|
+
|
185
|
+
:param saver: The new saver to be used.
|
186
|
+
"""
|
187
|
+
self._saver = saver
|
@@ -0,0 +1,192 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.downloaders.qidian_downloader
|
5
|
+
---------------------------------------------------
|
6
|
+
|
7
|
+
This module defines `QidianDownloader`, a platform-specific downloader
|
8
|
+
implementation for retrieving novels from Qidian (起点中文网).
|
9
|
+
"""
|
10
|
+
|
11
|
+
import json
|
12
|
+
import logging
|
13
|
+
from typing import Any, Dict
|
14
|
+
|
15
|
+
from novel_downloader.config import DownloaderConfig
|
16
|
+
from novel_downloader.core.interfaces import (
|
17
|
+
ParserProtocol,
|
18
|
+
RequesterProtocol,
|
19
|
+
SaverProtocol,
|
20
|
+
)
|
21
|
+
from novel_downloader.utils.file_utils import save_as_json, save_as_txt
|
22
|
+
from novel_downloader.utils.network import download_image_as_bytes
|
23
|
+
from novel_downloader.utils.time_utils import calculate_time_difference
|
24
|
+
|
25
|
+
from .base_downloader import BaseDownloader
|
26
|
+
|
27
|
+
logger = logging.getLogger(__name__)
|
28
|
+
|
29
|
+
|
30
|
+
class CommonDownloader(BaseDownloader):
|
31
|
+
"""
|
32
|
+
Specialized downloader for common novels.
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
requester: RequesterProtocol,
|
38
|
+
parser: ParserProtocol,
|
39
|
+
saver: SaverProtocol,
|
40
|
+
config: DownloaderConfig,
|
41
|
+
site: str,
|
42
|
+
):
|
43
|
+
"""
|
44
|
+
Initialize the common novel downloader with site information.
|
45
|
+
|
46
|
+
:param requester: Object implementing RequesterProtocol, used to fetch raw data.
|
47
|
+
:param parser: Object implementing ParserProtocol, used to parse page content.
|
48
|
+
:param saver: Object implementing SaverProtocol, used to save final output.
|
49
|
+
:param config: Downloader configuration object.
|
50
|
+
:param site: Identifier for the site the downloader is targeting.
|
51
|
+
"""
|
52
|
+
super().__init__(requester, parser, saver, config)
|
53
|
+
self._site = site
|
54
|
+
|
55
|
+
def download_one(self, book_id: str) -> None:
|
56
|
+
"""
|
57
|
+
The full download logic for a single book.
|
58
|
+
|
59
|
+
:param book_id: The identifier of the book to download.
|
60
|
+
"""
|
61
|
+
TAG = "[Downloader]"
|
62
|
+
save_html = self.config.save_html
|
63
|
+
skip_existing = self.config.skip_existing
|
64
|
+
site = self.site
|
65
|
+
wait_time = self.config.request_interval
|
66
|
+
|
67
|
+
raw_base = self.raw_data_dir / site / book_id
|
68
|
+
cache_base = self.cache_dir / site / book_id
|
69
|
+
info_path = raw_base / "book_info.json"
|
70
|
+
chapter_dir = raw_base / "chapters"
|
71
|
+
if save_html:
|
72
|
+
chapters_html_dir = cache_base / "html"
|
73
|
+
|
74
|
+
raw_base.mkdir(parents=True, exist_ok=True)
|
75
|
+
chapter_dir.mkdir(parents=True, exist_ok=True)
|
76
|
+
|
77
|
+
book_info: Dict[str, Any]
|
78
|
+
|
79
|
+
try:
|
80
|
+
if not info_path.exists():
|
81
|
+
raise FileNotFoundError
|
82
|
+
book_info = json.loads(info_path.read_text(encoding="utf-8"))
|
83
|
+
days, hrs, mins, secs = calculate_time_difference(
|
84
|
+
book_info.get("update_time", ""), "UTC+8"
|
85
|
+
)
|
86
|
+
logger.info(
|
87
|
+
"%s Last updated %dd %dh %dm %ds ago", TAG, days, hrs, mins, secs
|
88
|
+
)
|
89
|
+
if days > 1:
|
90
|
+
raise FileNotFoundError # trigger re-fetch
|
91
|
+
except Exception:
|
92
|
+
info_html = self.requester.get_book_info(book_id, wait_time)
|
93
|
+
if save_html:
|
94
|
+
info_html_path = chapters_html_dir / "info.html"
|
95
|
+
save_as_txt(info_html, info_html_path)
|
96
|
+
book_info = self.parser.parse_book_info(info_html)
|
97
|
+
if (
|
98
|
+
book_info.get("book_name", "") != "未找到书名"
|
99
|
+
and book_info.get("update_time", "") != "未找到更新时间"
|
100
|
+
):
|
101
|
+
save_as_json(book_info, info_path)
|
102
|
+
|
103
|
+
# download cover
|
104
|
+
cover_url = book_info.get("cover_url", "")
|
105
|
+
if cover_url:
|
106
|
+
cover_bytes = download_image_as_bytes(cover_url, raw_base)
|
107
|
+
if not cover_bytes:
|
108
|
+
logger.warning("%s Failed to download cover: %s", TAG, cover_url)
|
109
|
+
|
110
|
+
# enqueue chapters
|
111
|
+
for vol in book_info.get("volumes", []):
|
112
|
+
vol_name = vol.get("volume_name", "")
|
113
|
+
logger.info("%s Enqueuing volume: %s", TAG, vol_name)
|
114
|
+
|
115
|
+
for chap in vol.get("chapters", []):
|
116
|
+
cid = chap.get("chapterId")
|
117
|
+
if not cid:
|
118
|
+
logger.warning("%s Skipping chapter without chapterId", TAG)
|
119
|
+
continue
|
120
|
+
|
121
|
+
chap_path = chapter_dir / f"{cid}.json"
|
122
|
+
if chap_path.exists() and skip_existing:
|
123
|
+
logger.debug(
|
124
|
+
"%s Chapter already exists, skipping: %s",
|
125
|
+
TAG,
|
126
|
+
cid,
|
127
|
+
)
|
128
|
+
continue
|
129
|
+
|
130
|
+
chap_title = chap.get("title", "")
|
131
|
+
logger.info("%s Fetching chapter: %s (%s)", TAG, chap_title, cid)
|
132
|
+
try:
|
133
|
+
chap_html = self.requester.get_book_chapter(book_id, cid, wait_time)
|
134
|
+
|
135
|
+
if save_html:
|
136
|
+
html_path = chapters_html_dir / f"{cid}.html"
|
137
|
+
save_as_txt(chap_html, html_path, on_exist="skip")
|
138
|
+
logger.debug(
|
139
|
+
"%s Saved raw HTML for chapter %s to %s",
|
140
|
+
TAG,
|
141
|
+
cid,
|
142
|
+
html_path,
|
143
|
+
)
|
144
|
+
|
145
|
+
chap_json = self.parser.parse_chapter(chap_html, cid)
|
146
|
+
if not chap_json:
|
147
|
+
logger.warning(
|
148
|
+
"%s Parsed chapter json is empty, skipping: %s (%s)",
|
149
|
+
TAG,
|
150
|
+
chap_title,
|
151
|
+
cid,
|
152
|
+
)
|
153
|
+
continue
|
154
|
+
except Exception as e:
|
155
|
+
logger.warning(
|
156
|
+
"%s Error while processing chapter %s (%s): %s",
|
157
|
+
TAG,
|
158
|
+
chap_title,
|
159
|
+
cid,
|
160
|
+
str(e),
|
161
|
+
)
|
162
|
+
continue
|
163
|
+
|
164
|
+
save_as_json(chap_json, chap_path)
|
165
|
+
logger.info("%s Saved chapter: %s (%s)", TAG, chap_title, cid)
|
166
|
+
|
167
|
+
self.saver.save(book_id)
|
168
|
+
|
169
|
+
logger.info(
|
170
|
+
"%s Novel '%s' download completed.",
|
171
|
+
TAG,
|
172
|
+
book_info.get("book_name", "unknown"),
|
173
|
+
)
|
174
|
+
return
|
175
|
+
|
176
|
+
@property
|
177
|
+
def site(self) -> str:
|
178
|
+
"""
|
179
|
+
Get the site identifier.
|
180
|
+
|
181
|
+
:return: The site string.
|
182
|
+
"""
|
183
|
+
return self._site
|
184
|
+
|
185
|
+
@site.setter
|
186
|
+
def site(self, value: str) -> None:
|
187
|
+
"""
|
188
|
+
Set the site identifier.
|
189
|
+
|
190
|
+
:param value: New site string to set.
|
191
|
+
"""
|
192
|
+
self._site = value
|
@@ -0,0 +1,208 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.downloaders.qidian_downloader
|
5
|
+
---------------------------------------------------
|
6
|
+
|
7
|
+
This module defines `QidianDownloader`, a platform-specific downloader
|
8
|
+
implementation for retrieving novels from Qidian (起点中文网).
|
9
|
+
"""
|
10
|
+
|
11
|
+
import json
|
12
|
+
import logging
|
13
|
+
from typing import Any, Dict
|
14
|
+
|
15
|
+
from novel_downloader.config import DownloaderConfig
|
16
|
+
from novel_downloader.core.interfaces import (
|
17
|
+
ParserProtocol,
|
18
|
+
RequesterProtocol,
|
19
|
+
SaverProtocol,
|
20
|
+
)
|
21
|
+
from novel_downloader.utils.file_utils import save_as_json, save_as_txt
|
22
|
+
from novel_downloader.utils.network import download_image_as_bytes
|
23
|
+
from novel_downloader.utils.state import state_mgr
|
24
|
+
from novel_downloader.utils.time_utils import calculate_time_difference
|
25
|
+
|
26
|
+
from .base_downloader import BaseDownloader
|
27
|
+
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
class QidianDownloader(BaseDownloader):
|
32
|
+
"""
|
33
|
+
Specialized downloader for Qidian novels.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
requester: RequesterProtocol,
|
39
|
+
parser: ParserProtocol,
|
40
|
+
saver: SaverProtocol,
|
41
|
+
config: DownloaderConfig,
|
42
|
+
):
|
43
|
+
super().__init__(requester, parser, saver, config)
|
44
|
+
|
45
|
+
self._site_key = "qidian"
|
46
|
+
self._is_logged_in = self._handle_login()
|
47
|
+
state_mgr.set_manual_login_flag(self._site_key, not self._is_logged_in)
|
48
|
+
|
49
|
+
def _handle_login(self) -> bool:
|
50
|
+
"""
|
51
|
+
Perform login with automatic fallback to manual:
|
52
|
+
|
53
|
+
1. If manual_flag is False, try automatic login:
|
54
|
+
- On success, return True immediately.
|
55
|
+
2. Always attempt manual login if manual_flag is True.
|
56
|
+
3. Return True if manual login succeeds, False otherwise.
|
57
|
+
"""
|
58
|
+
manual_flag = state_mgr.get_manual_login_flag(self._site_key)
|
59
|
+
|
60
|
+
# First try automatic login
|
61
|
+
if not manual_flag:
|
62
|
+
if self._requester.login(manual_login=False):
|
63
|
+
return True
|
64
|
+
|
65
|
+
# try manual login
|
66
|
+
return self._requester.login(manual_login=True)
|
67
|
+
|
68
|
+
def download_one(self, book_id: str) -> None:
|
69
|
+
"""
|
70
|
+
The full download logic for a single book.
|
71
|
+
|
72
|
+
:param book_id: The identifier of the book to download.
|
73
|
+
"""
|
74
|
+
if not self._is_logged_in:
|
75
|
+
logger.warning(
|
76
|
+
f"[{self._site_key}] login failed, skipping download of {book_id}"
|
77
|
+
)
|
78
|
+
return
|
79
|
+
|
80
|
+
TAG = "[Downloader]"
|
81
|
+
save_html = self.config.save_html
|
82
|
+
skip_existing = self.config.skip_existing
|
83
|
+
wait_time = self.config.request_interval
|
84
|
+
|
85
|
+
raw_base = self.raw_data_dir / "qidian" / book_id
|
86
|
+
cache_base = self.cache_dir / "qidian" / book_id
|
87
|
+
info_path = raw_base / "book_info.json"
|
88
|
+
chapter_dir = raw_base / "chapters"
|
89
|
+
encrypted_chapter_dir = raw_base / "encrypted_chapters"
|
90
|
+
if save_html:
|
91
|
+
chapters_html_dir = cache_base / "html"
|
92
|
+
|
93
|
+
raw_base.mkdir(parents=True, exist_ok=True)
|
94
|
+
chapter_dir.mkdir(parents=True, exist_ok=True)
|
95
|
+
encrypted_chapter_dir.mkdir(parents=True, exist_ok=True)
|
96
|
+
|
97
|
+
book_info: Dict[str, Any]
|
98
|
+
|
99
|
+
try:
|
100
|
+
if not info_path.exists():
|
101
|
+
raise FileNotFoundError
|
102
|
+
book_info = json.loads(info_path.read_text(encoding="utf-8"))
|
103
|
+
days, hrs, mins, secs = calculate_time_difference(
|
104
|
+
book_info.get("update_time", ""), "UTC+8"
|
105
|
+
)
|
106
|
+
logger.info(
|
107
|
+
"%s Last updated %dd %dh %dm %ds ago", TAG, days, hrs, mins, secs
|
108
|
+
)
|
109
|
+
if days > 1:
|
110
|
+
raise FileNotFoundError # trigger re-fetch
|
111
|
+
except Exception:
|
112
|
+
info_html = self.requester.get_book_info(book_id, wait_time)
|
113
|
+
if save_html:
|
114
|
+
info_html_path = chapters_html_dir / "info.html"
|
115
|
+
save_as_txt(info_html, info_html_path)
|
116
|
+
book_info = self.parser.parse_book_info(info_html)
|
117
|
+
if (
|
118
|
+
book_info.get("book_name", "") != "未找到书名"
|
119
|
+
and book_info.get("update_time", "") != "未找到更新时间"
|
120
|
+
):
|
121
|
+
save_as_json(book_info, info_path)
|
122
|
+
|
123
|
+
# download cover
|
124
|
+
cover_url = book_info.get("cover_url", "")
|
125
|
+
if cover_url:
|
126
|
+
cover_bytes = download_image_as_bytes(cover_url, raw_base)
|
127
|
+
if not cover_bytes:
|
128
|
+
logger.warning("%s Failed to download cover: %s", TAG, cover_url)
|
129
|
+
|
130
|
+
# enqueue chapters
|
131
|
+
for vol in book_info.get("volumes", []):
|
132
|
+
vol_name = vol.get("volume_name", "")
|
133
|
+
logger.info("%s Enqueuing volume: %s", TAG, vol_name)
|
134
|
+
|
135
|
+
for chap in vol.get("chapters", []):
|
136
|
+
cid = chap.get("chapterId")
|
137
|
+
if not cid:
|
138
|
+
logger.warning("%s Skipping chapter without chapterId", TAG)
|
139
|
+
continue
|
140
|
+
|
141
|
+
chap_path = chapter_dir / f"{cid}.json"
|
142
|
+
|
143
|
+
if chap_path.exists() and skip_existing:
|
144
|
+
logger.debug(
|
145
|
+
"%s Chapter already exists, skipping: %s",
|
146
|
+
TAG,
|
147
|
+
cid,
|
148
|
+
)
|
149
|
+
continue
|
150
|
+
|
151
|
+
chap_title = chap.get("title", "")
|
152
|
+
logger.info("%s Fetching chapter: %s (%s)", TAG, chap_title, cid)
|
153
|
+
chap_html = self.requester.get_book_chapter(book_id, cid, wait_time)
|
154
|
+
|
155
|
+
is_encrypted = self.parser.is_encrypted(chap_html) # type: ignore[attr-defined]
|
156
|
+
|
157
|
+
folder = encrypted_chapter_dir if is_encrypted else chapter_dir
|
158
|
+
chap_path = folder / f"{cid}.json"
|
159
|
+
|
160
|
+
if chap_path.exists() and skip_existing:
|
161
|
+
logger.debug(
|
162
|
+
"%s Chapter already exists, skipping: %s",
|
163
|
+
TAG,
|
164
|
+
cid,
|
165
|
+
)
|
166
|
+
continue
|
167
|
+
|
168
|
+
if save_html and not is_vip(chap_html):
|
169
|
+
folder = chapters_html_dir / (
|
170
|
+
"html_encrypted" if is_encrypted else "html_plain"
|
171
|
+
)
|
172
|
+
html_path = folder / f"{cid}.html"
|
173
|
+
save_as_txt(chap_html, html_path, on_exist="skip")
|
174
|
+
logger.debug(
|
175
|
+
"%s Saved raw HTML for chapter %s to %s", TAG, cid, html_path
|
176
|
+
)
|
177
|
+
|
178
|
+
chap_json = self.parser.parse_chapter(chap_html, cid)
|
179
|
+
if not chap_json:
|
180
|
+
logger.warning(
|
181
|
+
"%s Parsed chapter json is empty, skipping: %s (%s)",
|
182
|
+
TAG,
|
183
|
+
chap_title,
|
184
|
+
cid,
|
185
|
+
)
|
186
|
+
continue
|
187
|
+
|
188
|
+
save_as_json(chap_json, chap_path)
|
189
|
+
logger.info("%s Saved chapter: %s (%s)", TAG, chap_title, cid)
|
190
|
+
|
191
|
+
self.saver.save(book_id)
|
192
|
+
|
193
|
+
logger.info(
|
194
|
+
"%s Novel '%s' download completed.",
|
195
|
+
TAG,
|
196
|
+
book_info.get("book_name", "unknown"),
|
197
|
+
)
|
198
|
+
return
|
199
|
+
|
200
|
+
|
201
|
+
def is_vip(html_str: str) -> bool:
|
202
|
+
"""
|
203
|
+
Return True if page indicates VIP-only content.
|
204
|
+
|
205
|
+
:param html_str: Raw HTML string.
|
206
|
+
"""
|
207
|
+
markers = ["这是VIP章节", "需要订阅", "订阅后才能阅读"]
|
208
|
+
return any(m in html_str for m in markers)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.factory
|
5
|
+
-----------------------------
|
6
|
+
|
7
|
+
This package provides factory methods for dynamically retrieving components
|
8
|
+
based on runtime parameters such as site name or content type.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from .downloader_factory import get_downloader
|
12
|
+
from .parser_factory import get_parser
|
13
|
+
from .requester_factory import get_requester
|
14
|
+
from .saver_factory import get_saver
|
15
|
+
|
16
|
+
__all__ = [
|
17
|
+
"get_downloader",
|
18
|
+
"get_parser",
|
19
|
+
"get_requester",
|
20
|
+
"get_saver",
|
21
|
+
]
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.factory.downloader_factory
|
5
|
+
------------------------------------------------
|
6
|
+
|
7
|
+
This module implements a factory function for creating downloader instances
|
8
|
+
based on the site name and parser mode specified in the configuration.
|
9
|
+
|
10
|
+
Currently supported:
|
11
|
+
- Site: 'qidian'
|
12
|
+
- QidianDownloader
|
13
|
+
|
14
|
+
To add support for new sites or modes, extend the `_site_map` accordingly.
|
15
|
+
"""
|
16
|
+
|
17
|
+
from novel_downloader.config import DownloaderConfig, load_site_rules
|
18
|
+
from novel_downloader.core.downloaders import (
|
19
|
+
CommonDownloader,
|
20
|
+
QidianDownloader,
|
21
|
+
)
|
22
|
+
from novel_downloader.core.interfaces import (
|
23
|
+
DownloaderProtocol,
|
24
|
+
ParserProtocol,
|
25
|
+
RequesterProtocol,
|
26
|
+
SaverProtocol,
|
27
|
+
)
|
28
|
+
|
29
|
+
_site_map = {
|
30
|
+
"qidian": QidianDownloader,
|
31
|
+
# "biquge": ...
|
32
|
+
}
|
33
|
+
|
34
|
+
|
35
|
+
def get_downloader(
|
36
|
+
requester: RequesterProtocol,
|
37
|
+
parser: ParserProtocol,
|
38
|
+
saver: SaverProtocol,
|
39
|
+
site: str,
|
40
|
+
config: DownloaderConfig,
|
41
|
+
) -> DownloaderProtocol:
|
42
|
+
"""
|
43
|
+
Returns a site-specific downloader instance.
|
44
|
+
|
45
|
+
:param requester: Requester implementation
|
46
|
+
:param parser: Parser implementation
|
47
|
+
:param saver: Saver implementation
|
48
|
+
:param site: Site name (e.g., 'qidian')
|
49
|
+
:param config: Downloader configuration
|
50
|
+
:return: An instance of a downloader class
|
51
|
+
"""
|
52
|
+
site_key = site.lower()
|
53
|
+
|
54
|
+
if site_key in _site_map:
|
55
|
+
downloader_class = _site_map[site_key]
|
56
|
+
return downloader_class(requester, parser, saver, config)
|
57
|
+
|
58
|
+
site_rules = load_site_rules()
|
59
|
+
if site_key not in site_rules:
|
60
|
+
raise ValueError(f"Unsupported site: {site}")
|
61
|
+
|
62
|
+
return CommonDownloader(requester, parser, saver, config, site_key)
|