novel-downloader 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +14 -0
- novel_downloader/cli/__init__.py +14 -0
- novel_downloader/cli/clean.py +134 -0
- novel_downloader/cli/download.py +132 -0
- novel_downloader/cli/interactive.py +67 -0
- novel_downloader/cli/main.py +45 -0
- novel_downloader/cli/settings.py +177 -0
- novel_downloader/config/__init__.py +52 -0
- novel_downloader/config/adapter.py +153 -0
- novel_downloader/config/loader.py +177 -0
- novel_downloader/config/models.py +173 -0
- novel_downloader/config/site_rules.py +97 -0
- novel_downloader/core/__init__.py +25 -0
- novel_downloader/core/downloaders/__init__.py +22 -0
- novel_downloader/core/downloaders/base_async_downloader.py +157 -0
- novel_downloader/core/downloaders/base_downloader.py +187 -0
- novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
- novel_downloader/core/downloaders/common_downloader.py +191 -0
- novel_downloader/core/downloaders/qidian_downloader.py +208 -0
- novel_downloader/core/factory/__init__.py +33 -0
- novel_downloader/core/factory/downloader_factory.py +149 -0
- novel_downloader/core/factory/parser_factory.py +62 -0
- novel_downloader/core/factory/requester_factory.py +106 -0
- novel_downloader/core/factory/saver_factory.py +49 -0
- novel_downloader/core/interfaces/__init__.py +32 -0
- novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/async_requester_protocol.py +68 -0
- novel_downloader/core/interfaces/downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/parser_protocol.py +40 -0
- novel_downloader/core/interfaces/requester_protocol.py +65 -0
- novel_downloader/core/interfaces/saver_protocol.py +61 -0
- novel_downloader/core/parsers/__init__.py +28 -0
- novel_downloader/core/parsers/base_parser.py +96 -0
- novel_downloader/core/parsers/common_parser/__init__.py +14 -0
- novel_downloader/core/parsers/common_parser/helper.py +321 -0
- novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
- novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
- novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
- novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
- novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
- novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
- novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
- novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
- novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
- novel_downloader/core/requesters/__init__.py +31 -0
- novel_downloader/core/requesters/base_async_session.py +297 -0
- novel_downloader/core/requesters/base_browser.py +210 -0
- novel_downloader/core/requesters/base_session.py +243 -0
- novel_downloader/core/requesters/common_requester/__init__.py +18 -0
- novel_downloader/core/requesters/common_requester/common_async_session.py +96 -0
- novel_downloader/core/requesters/common_requester/common_session.py +126 -0
- novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
- novel_downloader/core/savers/__init__.py +20 -0
- novel_downloader/core/savers/base_saver.py +169 -0
- novel_downloader/core/savers/common_saver/__init__.py +13 -0
- novel_downloader/core/savers/common_saver/common_epub.py +232 -0
- novel_downloader/core/savers/common_saver/common_txt.py +176 -0
- novel_downloader/core/savers/common_saver/main_saver.py +86 -0
- novel_downloader/core/savers/epub_utils/__init__.py +27 -0
- novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
- novel_downloader/core/savers/epub_utils/initializer.py +98 -0
- novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
- novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
- novel_downloader/core/savers/qidian_saver.py +22 -0
- novel_downloader/locales/en.json +91 -0
- novel_downloader/locales/zh.json +91 -0
- novel_downloader/resources/config/rules.toml +196 -0
- novel_downloader/resources/config/settings.yaml +73 -0
- novel_downloader/resources/css_styles/main.css +104 -0
- novel_downloader/resources/css_styles/volume-intro.css +56 -0
- novel_downloader/resources/images/volume_border.png +0 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
- novel_downloader/resources/json/replace_word_map.json +4 -0
- novel_downloader/resources/text/blacklist.txt +22 -0
- novel_downloader/utils/__init__.py +0 -0
- novel_downloader/utils/cache.py +24 -0
- novel_downloader/utils/constants.py +158 -0
- novel_downloader/utils/crypto_utils.py +144 -0
- novel_downloader/utils/file_utils/__init__.py +43 -0
- novel_downloader/utils/file_utils/io.py +252 -0
- novel_downloader/utils/file_utils/normalize.py +68 -0
- novel_downloader/utils/file_utils/sanitize.py +77 -0
- novel_downloader/utils/fontocr/__init__.py +23 -0
- novel_downloader/utils/fontocr/ocr_v1.py +304 -0
- novel_downloader/utils/fontocr/ocr_v2.py +658 -0
- novel_downloader/utils/hash_store.py +288 -0
- novel_downloader/utils/hash_utils.py +103 -0
- novel_downloader/utils/i18n.py +41 -0
- novel_downloader/utils/logger.py +104 -0
- novel_downloader/utils/model_loader.py +72 -0
- novel_downloader/utils/network.py +287 -0
- novel_downloader/utils/state.py +156 -0
- novel_downloader/utils/text_utils/__init__.py +27 -0
- novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
- novel_downloader/utils/text_utils/diff_display.py +75 -0
- novel_downloader/utils/text_utils/font_mapping.py +31 -0
- novel_downloader/utils/text_utils/text_cleaning.py +57 -0
- novel_downloader/utils/time_utils/__init__.py +22 -0
- novel_downloader/utils/time_utils/datetime_utils.py +146 -0
- novel_downloader/utils/time_utils/sleep_utils.py +49 -0
- novel_downloader-1.1.0.dist-info/METADATA +157 -0
- novel_downloader-1.1.0.dist-info/RECORD +115 -0
- novel_downloader-1.1.0.dist-info/WHEEL +5 -0
- novel_downloader-1.1.0.dist-info/entry_points.txt +2 -0
- novel_downloader-1.1.0.dist-info/licenses/LICENSE +21 -0
- novel_downloader-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,191 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.downloaders.common_downloader
|
5
|
+
---------------------------------------------------
|
6
|
+
|
7
|
+
This module defines `CommonDownloader`.
|
8
|
+
"""
|
9
|
+
|
10
|
+
import json
|
11
|
+
import logging
|
12
|
+
from typing import Any, Dict
|
13
|
+
|
14
|
+
from novel_downloader.config import DownloaderConfig
|
15
|
+
from novel_downloader.core.interfaces import (
|
16
|
+
ParserProtocol,
|
17
|
+
RequesterProtocol,
|
18
|
+
SaverProtocol,
|
19
|
+
)
|
20
|
+
from novel_downloader.utils.file_utils import save_as_json, save_as_txt
|
21
|
+
from novel_downloader.utils.network import download_image_as_bytes
|
22
|
+
from novel_downloader.utils.time_utils import calculate_time_difference
|
23
|
+
|
24
|
+
from .base_downloader import BaseDownloader
|
25
|
+
|
26
|
+
logger = logging.getLogger(__name__)
|
27
|
+
|
28
|
+
|
29
|
+
class CommonDownloader(BaseDownloader):
|
30
|
+
"""
|
31
|
+
Specialized downloader for common novels.
|
32
|
+
"""
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
requester: RequesterProtocol,
|
37
|
+
parser: ParserProtocol,
|
38
|
+
saver: SaverProtocol,
|
39
|
+
config: DownloaderConfig,
|
40
|
+
site: str,
|
41
|
+
):
|
42
|
+
"""
|
43
|
+
Initialize the common novel downloader with site information.
|
44
|
+
|
45
|
+
:param requester: Object implementing RequesterProtocol, used to fetch raw data.
|
46
|
+
:param parser: Object implementing ParserProtocol, used to parse page content.
|
47
|
+
:param saver: Object implementing SaverProtocol, used to save final output.
|
48
|
+
:param config: Downloader configuration object.
|
49
|
+
:param site: Identifier for the site the downloader is targeting.
|
50
|
+
"""
|
51
|
+
super().__init__(requester, parser, saver, config)
|
52
|
+
self._site = site
|
53
|
+
|
54
|
+
def download_one(self, book_id: str) -> None:
|
55
|
+
"""
|
56
|
+
The full download logic for a single book.
|
57
|
+
|
58
|
+
:param book_id: The identifier of the book to download.
|
59
|
+
"""
|
60
|
+
TAG = "[Downloader]"
|
61
|
+
save_html = self.config.save_html
|
62
|
+
skip_existing = self.config.skip_existing
|
63
|
+
site = self.site
|
64
|
+
wait_time = self.config.request_interval
|
65
|
+
|
66
|
+
raw_base = self.raw_data_dir / site / book_id
|
67
|
+
cache_base = self.cache_dir / site / book_id
|
68
|
+
info_path = raw_base / "book_info.json"
|
69
|
+
chapter_dir = raw_base / "chapters"
|
70
|
+
if save_html:
|
71
|
+
chapters_html_dir = cache_base / "html"
|
72
|
+
|
73
|
+
raw_base.mkdir(parents=True, exist_ok=True)
|
74
|
+
chapter_dir.mkdir(parents=True, exist_ok=True)
|
75
|
+
|
76
|
+
book_info: Dict[str, Any]
|
77
|
+
|
78
|
+
try:
|
79
|
+
if not info_path.exists():
|
80
|
+
raise FileNotFoundError
|
81
|
+
book_info = json.loads(info_path.read_text(encoding="utf-8"))
|
82
|
+
days, hrs, mins, secs = calculate_time_difference(
|
83
|
+
book_info.get("update_time", ""), "UTC+8"
|
84
|
+
)
|
85
|
+
logger.info(
|
86
|
+
"%s Last updated %dd %dh %dm %ds ago", TAG, days, hrs, mins, secs
|
87
|
+
)
|
88
|
+
if days > 1:
|
89
|
+
raise FileNotFoundError # trigger re-fetch
|
90
|
+
except Exception:
|
91
|
+
info_html = self.requester.get_book_info(book_id, wait_time)
|
92
|
+
if save_html:
|
93
|
+
info_html_path = chapters_html_dir / "info.html"
|
94
|
+
save_as_txt(info_html, info_html_path)
|
95
|
+
book_info = self.parser.parse_book_info(info_html)
|
96
|
+
if (
|
97
|
+
book_info.get("book_name", "") != "未找到书名"
|
98
|
+
and book_info.get("update_time", "") != "未找到更新时间"
|
99
|
+
):
|
100
|
+
save_as_json(book_info, info_path)
|
101
|
+
|
102
|
+
# download cover
|
103
|
+
cover_url = book_info.get("cover_url", "")
|
104
|
+
if cover_url:
|
105
|
+
cover_bytes = download_image_as_bytes(cover_url, raw_base)
|
106
|
+
if not cover_bytes:
|
107
|
+
logger.warning("%s Failed to download cover: %s", TAG, cover_url)
|
108
|
+
|
109
|
+
# enqueue chapters
|
110
|
+
for vol in book_info.get("volumes", []):
|
111
|
+
vol_name = vol.get("volume_name", "")
|
112
|
+
logger.info("%s Enqueuing volume: %s", TAG, vol_name)
|
113
|
+
|
114
|
+
for chap in vol.get("chapters", []):
|
115
|
+
cid = chap.get("chapterId")
|
116
|
+
if not cid:
|
117
|
+
logger.warning("%s Skipping chapter without chapterId", TAG)
|
118
|
+
continue
|
119
|
+
|
120
|
+
chap_path = chapter_dir / f"{cid}.json"
|
121
|
+
if chap_path.exists() and skip_existing:
|
122
|
+
logger.debug(
|
123
|
+
"%s Chapter already exists, skipping: %s",
|
124
|
+
TAG,
|
125
|
+
cid,
|
126
|
+
)
|
127
|
+
continue
|
128
|
+
|
129
|
+
chap_title = chap.get("title", "")
|
130
|
+
logger.info("%s Fetching chapter: %s (%s)", TAG, chap_title, cid)
|
131
|
+
try:
|
132
|
+
chap_html = self.requester.get_book_chapter(book_id, cid, wait_time)
|
133
|
+
|
134
|
+
if save_html:
|
135
|
+
html_path = chapters_html_dir / f"{cid}.html"
|
136
|
+
save_as_txt(chap_html, html_path, on_exist="skip")
|
137
|
+
logger.debug(
|
138
|
+
"%s Saved raw HTML for chapter %s to %s",
|
139
|
+
TAG,
|
140
|
+
cid,
|
141
|
+
html_path,
|
142
|
+
)
|
143
|
+
|
144
|
+
chap_json = self.parser.parse_chapter(chap_html, cid)
|
145
|
+
if not chap_json:
|
146
|
+
logger.warning(
|
147
|
+
"%s Parsed chapter json is empty, skipping: %s (%s)",
|
148
|
+
TAG,
|
149
|
+
chap_title,
|
150
|
+
cid,
|
151
|
+
)
|
152
|
+
continue
|
153
|
+
except Exception as e:
|
154
|
+
logger.warning(
|
155
|
+
"%s Error while processing chapter %s (%s): %s",
|
156
|
+
TAG,
|
157
|
+
chap_title,
|
158
|
+
cid,
|
159
|
+
str(e),
|
160
|
+
)
|
161
|
+
continue
|
162
|
+
|
163
|
+
save_as_json(chap_json, chap_path)
|
164
|
+
logger.info("%s Saved chapter: %s (%s)", TAG, chap_title, cid)
|
165
|
+
|
166
|
+
self.saver.save(book_id)
|
167
|
+
|
168
|
+
logger.info(
|
169
|
+
"%s Novel '%s' download completed.",
|
170
|
+
TAG,
|
171
|
+
book_info.get("book_name", "unknown"),
|
172
|
+
)
|
173
|
+
return
|
174
|
+
|
175
|
+
@property
|
176
|
+
def site(self) -> str:
|
177
|
+
"""
|
178
|
+
Get the site identifier.
|
179
|
+
|
180
|
+
:return: The site string.
|
181
|
+
"""
|
182
|
+
return self._site
|
183
|
+
|
184
|
+
@site.setter
|
185
|
+
def site(self, value: str) -> None:
|
186
|
+
"""
|
187
|
+
Set the site identifier.
|
188
|
+
|
189
|
+
:param value: New site string to set.
|
190
|
+
"""
|
191
|
+
self._site = value
|
@@ -0,0 +1,208 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.downloaders.qidian_downloader
|
5
|
+
---------------------------------------------------
|
6
|
+
|
7
|
+
This module defines `QidianDownloader`, a platform-specific downloader
|
8
|
+
implementation for retrieving novels from Qidian (起点中文网).
|
9
|
+
"""
|
10
|
+
|
11
|
+
import json
|
12
|
+
import logging
|
13
|
+
from typing import Any, Dict
|
14
|
+
|
15
|
+
from novel_downloader.config import DownloaderConfig
|
16
|
+
from novel_downloader.core.interfaces import (
|
17
|
+
ParserProtocol,
|
18
|
+
RequesterProtocol,
|
19
|
+
SaverProtocol,
|
20
|
+
)
|
21
|
+
from novel_downloader.utils.file_utils import save_as_json, save_as_txt
|
22
|
+
from novel_downloader.utils.network import download_image_as_bytes
|
23
|
+
from novel_downloader.utils.state import state_mgr
|
24
|
+
from novel_downloader.utils.time_utils import calculate_time_difference
|
25
|
+
|
26
|
+
from .base_downloader import BaseDownloader
|
27
|
+
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
class QidianDownloader(BaseDownloader):
|
32
|
+
"""
|
33
|
+
Specialized downloader for Qidian novels.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
requester: RequesterProtocol,
|
39
|
+
parser: ParserProtocol,
|
40
|
+
saver: SaverProtocol,
|
41
|
+
config: DownloaderConfig,
|
42
|
+
):
|
43
|
+
super().__init__(requester, parser, saver, config)
|
44
|
+
|
45
|
+
self._site_key = "qidian"
|
46
|
+
self._is_logged_in = self._handle_login()
|
47
|
+
state_mgr.set_manual_login_flag(self._site_key, not self._is_logged_in)
|
48
|
+
|
49
|
+
def _handle_login(self) -> bool:
|
50
|
+
"""
|
51
|
+
Perform login with automatic fallback to manual:
|
52
|
+
|
53
|
+
1. If manual_flag is False, try automatic login:
|
54
|
+
- On success, return True immediately.
|
55
|
+
2. Always attempt manual login if manual_flag is True.
|
56
|
+
3. Return True if manual login succeeds, False otherwise.
|
57
|
+
"""
|
58
|
+
manual_flag = state_mgr.get_manual_login_flag(self._site_key)
|
59
|
+
|
60
|
+
# First try automatic login
|
61
|
+
if not manual_flag:
|
62
|
+
if self._requester.login(manual_login=False):
|
63
|
+
return True
|
64
|
+
|
65
|
+
# try manual login
|
66
|
+
return self._requester.login(manual_login=True)
|
67
|
+
|
68
|
+
def download_one(self, book_id: str) -> None:
|
69
|
+
"""
|
70
|
+
The full download logic for a single book.
|
71
|
+
|
72
|
+
:param book_id: The identifier of the book to download.
|
73
|
+
"""
|
74
|
+
if not self._is_logged_in:
|
75
|
+
logger.warning(
|
76
|
+
f"[{self._site_key}] login failed, skipping download of {book_id}"
|
77
|
+
)
|
78
|
+
return
|
79
|
+
|
80
|
+
TAG = "[Downloader]"
|
81
|
+
save_html = self.config.save_html
|
82
|
+
skip_existing = self.config.skip_existing
|
83
|
+
wait_time = self.config.request_interval
|
84
|
+
|
85
|
+
raw_base = self.raw_data_dir / "qidian" / book_id
|
86
|
+
cache_base = self.cache_dir / "qidian" / book_id
|
87
|
+
info_path = raw_base / "book_info.json"
|
88
|
+
chapter_dir = raw_base / "chapters"
|
89
|
+
encrypted_chapter_dir = raw_base / "encrypted_chapters"
|
90
|
+
if save_html:
|
91
|
+
chapters_html_dir = cache_base / "html"
|
92
|
+
|
93
|
+
raw_base.mkdir(parents=True, exist_ok=True)
|
94
|
+
chapter_dir.mkdir(parents=True, exist_ok=True)
|
95
|
+
encrypted_chapter_dir.mkdir(parents=True, exist_ok=True)
|
96
|
+
|
97
|
+
book_info: Dict[str, Any]
|
98
|
+
|
99
|
+
try:
|
100
|
+
if not info_path.exists():
|
101
|
+
raise FileNotFoundError
|
102
|
+
book_info = json.loads(info_path.read_text(encoding="utf-8"))
|
103
|
+
days, hrs, mins, secs = calculate_time_difference(
|
104
|
+
book_info.get("update_time", ""), "UTC+8"
|
105
|
+
)
|
106
|
+
logger.info(
|
107
|
+
"%s Last updated %dd %dh %dm %ds ago", TAG, days, hrs, mins, secs
|
108
|
+
)
|
109
|
+
if days > 1:
|
110
|
+
raise FileNotFoundError # trigger re-fetch
|
111
|
+
except Exception:
|
112
|
+
info_html = self.requester.get_book_info(book_id, wait_time)
|
113
|
+
if save_html:
|
114
|
+
info_html_path = chapters_html_dir / "info.html"
|
115
|
+
save_as_txt(info_html, info_html_path)
|
116
|
+
book_info = self.parser.parse_book_info(info_html)
|
117
|
+
if (
|
118
|
+
book_info.get("book_name", "") != "未找到书名"
|
119
|
+
and book_info.get("update_time", "") != "未找到更新时间"
|
120
|
+
):
|
121
|
+
save_as_json(book_info, info_path)
|
122
|
+
|
123
|
+
# download cover
|
124
|
+
cover_url = book_info.get("cover_url", "")
|
125
|
+
if cover_url:
|
126
|
+
cover_bytes = download_image_as_bytes(cover_url, raw_base)
|
127
|
+
if not cover_bytes:
|
128
|
+
logger.warning("%s Failed to download cover: %s", TAG, cover_url)
|
129
|
+
|
130
|
+
# enqueue chapters
|
131
|
+
for vol in book_info.get("volumes", []):
|
132
|
+
vol_name = vol.get("volume_name", "")
|
133
|
+
logger.info("%s Enqueuing volume: %s", TAG, vol_name)
|
134
|
+
|
135
|
+
for chap in vol.get("chapters", []):
|
136
|
+
cid = chap.get("chapterId")
|
137
|
+
if not cid:
|
138
|
+
logger.warning("%s Skipping chapter without chapterId", TAG)
|
139
|
+
continue
|
140
|
+
|
141
|
+
chap_path = chapter_dir / f"{cid}.json"
|
142
|
+
|
143
|
+
if chap_path.exists() and skip_existing:
|
144
|
+
logger.debug(
|
145
|
+
"%s Chapter already exists, skipping: %s",
|
146
|
+
TAG,
|
147
|
+
cid,
|
148
|
+
)
|
149
|
+
continue
|
150
|
+
|
151
|
+
chap_title = chap.get("title", "")
|
152
|
+
logger.info("%s Fetching chapter: %s (%s)", TAG, chap_title, cid)
|
153
|
+
chap_html = self.requester.get_book_chapter(book_id, cid, wait_time)
|
154
|
+
|
155
|
+
is_encrypted = self.parser.is_encrypted(chap_html) # type: ignore[attr-defined]
|
156
|
+
|
157
|
+
folder = encrypted_chapter_dir if is_encrypted else chapter_dir
|
158
|
+
chap_path = folder / f"{cid}.json"
|
159
|
+
|
160
|
+
if chap_path.exists() and skip_existing:
|
161
|
+
logger.debug(
|
162
|
+
"%s Chapter already exists, skipping: %s",
|
163
|
+
TAG,
|
164
|
+
cid,
|
165
|
+
)
|
166
|
+
continue
|
167
|
+
|
168
|
+
if save_html and not is_vip(chap_html):
|
169
|
+
folder = chapters_html_dir / (
|
170
|
+
"html_encrypted" if is_encrypted else "html_plain"
|
171
|
+
)
|
172
|
+
html_path = folder / f"{cid}.html"
|
173
|
+
save_as_txt(chap_html, html_path, on_exist="skip")
|
174
|
+
logger.debug(
|
175
|
+
"%s Saved raw HTML for chapter %s to %s", TAG, cid, html_path
|
176
|
+
)
|
177
|
+
|
178
|
+
chap_json = self.parser.parse_chapter(chap_html, cid)
|
179
|
+
if not chap_json:
|
180
|
+
logger.warning(
|
181
|
+
"%s Parsed chapter json is empty, skipping: %s (%s)",
|
182
|
+
TAG,
|
183
|
+
chap_title,
|
184
|
+
cid,
|
185
|
+
)
|
186
|
+
continue
|
187
|
+
|
188
|
+
save_as_json(chap_json, chap_path)
|
189
|
+
logger.info("%s Saved chapter: %s (%s)", TAG, chap_title, cid)
|
190
|
+
|
191
|
+
self.saver.save(book_id)
|
192
|
+
|
193
|
+
logger.info(
|
194
|
+
"%s Novel '%s' download completed.",
|
195
|
+
TAG,
|
196
|
+
book_info.get("book_name", "unknown"),
|
197
|
+
)
|
198
|
+
return
|
199
|
+
|
200
|
+
|
201
|
+
def is_vip(html_str: str) -> bool:
|
202
|
+
"""
|
203
|
+
Return True if page indicates VIP-only content.
|
204
|
+
|
205
|
+
:param html_str: Raw HTML string.
|
206
|
+
"""
|
207
|
+
markers = ["这是VIP章节", "需要订阅", "订阅后才能阅读"]
|
208
|
+
return any(m in html_str for m in markers)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.factory
|
5
|
+
-----------------------------
|
6
|
+
|
7
|
+
This package provides factory methods for dynamically retrieving components
|
8
|
+
based on runtime parameters such as site name or content type.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from .downloader_factory import (
|
12
|
+
get_async_downloader,
|
13
|
+
get_downloader,
|
14
|
+
get_sync_downloader,
|
15
|
+
)
|
16
|
+
from .parser_factory import get_parser
|
17
|
+
from .requester_factory import (
|
18
|
+
get_async_requester,
|
19
|
+
get_requester,
|
20
|
+
get_sync_requester,
|
21
|
+
)
|
22
|
+
from .saver_factory import get_saver
|
23
|
+
|
24
|
+
__all__ = [
|
25
|
+
"get_async_downloader",
|
26
|
+
"get_downloader",
|
27
|
+
"get_sync_downloader",
|
28
|
+
"get_parser",
|
29
|
+
"get_async_requester",
|
30
|
+
"get_requester",
|
31
|
+
"get_sync_requester",
|
32
|
+
"get_saver",
|
33
|
+
]
|
@@ -0,0 +1,149 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.factory.downloader_factory
|
5
|
+
------------------------------------------------
|
6
|
+
|
7
|
+
This module implements a factory function for creating downloader instances
|
8
|
+
based on the site name and parser mode specified in the configuration.
|
9
|
+
|
10
|
+
- get_async_downloader -> always returns a CommonAsyncDownloader
|
11
|
+
- get_sync_downloader -> returns a site-specific downloader or CommonDownloader
|
12
|
+
- get_downloader -> dispatches to one of the above based on config.mode
|
13
|
+
|
14
|
+
To add support for new sites or modes, extend the `_site_map` accordingly.
|
15
|
+
"""
|
16
|
+
|
17
|
+
from typing import Union
|
18
|
+
|
19
|
+
from novel_downloader.config import DownloaderConfig, load_site_rules
|
20
|
+
from novel_downloader.core.downloaders import (
|
21
|
+
CommonAsyncDownloader,
|
22
|
+
CommonDownloader,
|
23
|
+
QidianDownloader,
|
24
|
+
)
|
25
|
+
from novel_downloader.core.interfaces import (
|
26
|
+
AsyncDownloaderProtocol,
|
27
|
+
AsyncRequesterProtocol,
|
28
|
+
DownloaderProtocol,
|
29
|
+
ParserProtocol,
|
30
|
+
RequesterProtocol,
|
31
|
+
SaverProtocol,
|
32
|
+
)
|
33
|
+
|
34
|
+
_site_map = {
|
35
|
+
"qidian": QidianDownloader,
|
36
|
+
# "biquge": ...
|
37
|
+
}
|
38
|
+
|
39
|
+
|
40
|
+
def get_async_downloader(
|
41
|
+
requester: AsyncRequesterProtocol,
|
42
|
+
parser: ParserProtocol,
|
43
|
+
saver: SaverProtocol,
|
44
|
+
site: str,
|
45
|
+
config: DownloaderConfig,
|
46
|
+
) -> AsyncDownloaderProtocol:
|
47
|
+
"""
|
48
|
+
Returns an AsyncDownloaderProtocol for the given site.
|
49
|
+
|
50
|
+
:param requester: Requester implementation
|
51
|
+
:param parser: Parser implementation
|
52
|
+
:param saver: Saver implementation
|
53
|
+
:param site: Site name (e.g., 'qidian')
|
54
|
+
:param config: Downloader configuration
|
55
|
+
|
56
|
+
:return: An instance of a downloader class
|
57
|
+
|
58
|
+
:raises ValueError: If a site-specific downloader does not support async mode.
|
59
|
+
:raises TypeError: If the provided requester does not match the required protocol
|
60
|
+
for the chosen mode (sync vs async).
|
61
|
+
"""
|
62
|
+
site_key = site.lower()
|
63
|
+
|
64
|
+
if not isinstance(requester, AsyncRequesterProtocol):
|
65
|
+
raise TypeError("Async mode requires an AsyncRequesterProtocol")
|
66
|
+
|
67
|
+
site_rules = load_site_rules()
|
68
|
+
site_rule = site_rules.get(site_key)
|
69
|
+
if site_rule is None:
|
70
|
+
raise ValueError(f"Unsupported site: {site}")
|
71
|
+
|
72
|
+
return CommonAsyncDownloader(requester, parser, saver, config, site_key)
|
73
|
+
|
74
|
+
|
75
|
+
def get_sync_downloader(
|
76
|
+
requester: RequesterProtocol,
|
77
|
+
parser: ParserProtocol,
|
78
|
+
saver: SaverProtocol,
|
79
|
+
site: str,
|
80
|
+
config: DownloaderConfig,
|
81
|
+
) -> DownloaderProtocol:
|
82
|
+
"""
|
83
|
+
Returns a DownloaderProtocol for the given site.
|
84
|
+
First tries a site-specific downloader (e.g. QidianDownloader),
|
85
|
+
otherwise falls back to CommonDownloader.
|
86
|
+
|
87
|
+
:param requester: Requester implementation
|
88
|
+
:param parser: Parser implementation
|
89
|
+
:param saver: Saver implementation
|
90
|
+
:param site: Site name (e.g., 'qidian')
|
91
|
+
:param config: Downloader configuration
|
92
|
+
|
93
|
+
:return: An instance of a downloader class
|
94
|
+
|
95
|
+
:raises ValueError: If a site-specific downloader does not support async mode.
|
96
|
+
:raises TypeError: If the provided requester does not match the required protocol
|
97
|
+
for the chosen mode (sync vs async).
|
98
|
+
"""
|
99
|
+
site_key = site.lower()
|
100
|
+
|
101
|
+
if not isinstance(requester, RequesterProtocol):
|
102
|
+
raise TypeError("Sync mode requires a RequesterProtocol")
|
103
|
+
|
104
|
+
# site-specific
|
105
|
+
if site_key in _site_map:
|
106
|
+
return _site_map[site_key](requester, parser, saver, config)
|
107
|
+
|
108
|
+
# fallback
|
109
|
+
site_rules = load_site_rules()
|
110
|
+
site_rule = site_rules.get(site_key)
|
111
|
+
if site_rule is None:
|
112
|
+
raise ValueError(f"Unsupported site: {site}")
|
113
|
+
|
114
|
+
return CommonDownloader(requester, parser, saver, config, site_key)
|
115
|
+
|
116
|
+
|
117
|
+
def get_downloader(
|
118
|
+
requester: Union[AsyncRequesterProtocol, RequesterProtocol],
|
119
|
+
parser: ParserProtocol,
|
120
|
+
saver: SaverProtocol,
|
121
|
+
site: str,
|
122
|
+
config: DownloaderConfig,
|
123
|
+
) -> Union[AsyncDownloaderProtocol, DownloaderProtocol]:
|
124
|
+
"""
|
125
|
+
Dispatches to get_async_downloader if config.mode == 'async',
|
126
|
+
otherwise to get_sync_downloader.
|
127
|
+
|
128
|
+
:param requester: Requester implementation
|
129
|
+
:param parser: Parser implementation
|
130
|
+
:param saver: Saver implementation
|
131
|
+
:param site: Site name (e.g., 'qidian')
|
132
|
+
:param config: Downloader configuration
|
133
|
+
|
134
|
+
:return: An instance of a downloader class
|
135
|
+
|
136
|
+
:raises ValueError: If a site-specific downloader does not support async mode.
|
137
|
+
:raises TypeError: If the provided requester does not match the required protocol
|
138
|
+
for the chosen mode (sync vs async).
|
139
|
+
"""
|
140
|
+
mode = config.mode.lower()
|
141
|
+
if mode == "async":
|
142
|
+
if not isinstance(requester, AsyncRequesterProtocol):
|
143
|
+
raise TypeError("Async mode requires an AsyncRequesterProtocol")
|
144
|
+
return get_async_downloader(requester, parser, saver, site, config)
|
145
|
+
if mode in ("browser", "session"):
|
146
|
+
if not isinstance(requester, RequesterProtocol):
|
147
|
+
raise TypeError("Sync mode requires a RequesterProtocol")
|
148
|
+
return get_sync_downloader(requester, parser, saver, site, config)
|
149
|
+
raise ValueError(f"Unknown mode '{config.mode}' for site '{site}'")
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.factory.parser_factory
|
5
|
+
--------------------------------------------
|
6
|
+
|
7
|
+
This module implements a factory function for creating parser instances
|
8
|
+
based on the site name and parser mode specified in the configuration.
|
9
|
+
|
10
|
+
Currently supported:
|
11
|
+
- Site: 'qidian'
|
12
|
+
- Modes:
|
13
|
+
- 'browser': QidianBrowserParser
|
14
|
+
- 'session': (Not implemented yet)
|
15
|
+
|
16
|
+
To add support for new sites or modes, extend the `_site_map` accordingly.
|
17
|
+
"""
|
18
|
+
|
19
|
+
from novel_downloader.config import ParserConfig, load_site_rules
|
20
|
+
from novel_downloader.core.interfaces import ParserProtocol
|
21
|
+
from novel_downloader.core.parsers import (
|
22
|
+
CommonParser,
|
23
|
+
QidianBrowserParser,
|
24
|
+
QidianSessionParser,
|
25
|
+
)
|
26
|
+
|
27
|
+
_site_map = {
|
28
|
+
"qidian": {
|
29
|
+
"browser": QidianBrowserParser,
|
30
|
+
"session": QidianSessionParser,
|
31
|
+
},
|
32
|
+
# "biquge": ...
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
def get_parser(site: str, config: ParserConfig) -> ParserProtocol:
|
37
|
+
"""
|
38
|
+
Returns a site-specific parser instance.
|
39
|
+
|
40
|
+
:param site: Site name (e.g., 'qidian')
|
41
|
+
:param config: Configuration for the parser
|
42
|
+
:return: An instance of a parser class
|
43
|
+
"""
|
44
|
+
site_key = site.lower()
|
45
|
+
|
46
|
+
if site_key in _site_map:
|
47
|
+
site_entry = _site_map[site_key]
|
48
|
+
if isinstance(site_entry, dict):
|
49
|
+
parser_class = site_entry.get(config.mode)
|
50
|
+
if parser_class is None:
|
51
|
+
raise ValueError(f"Unsupported mode '{config.mode}' for site '{site}'")
|
52
|
+
else:
|
53
|
+
parser_class = site_entry
|
54
|
+
return parser_class(config)
|
55
|
+
|
56
|
+
# Fallback: site not mapped specially, try to load rule
|
57
|
+
site_rules = load_site_rules()
|
58
|
+
site_rule = site_rules.get(site_key)
|
59
|
+
if site_rule is None:
|
60
|
+
raise ValueError(f"Unsupported site: {site}")
|
61
|
+
|
62
|
+
return CommonParser(config, site_key, site_rule)
|