novel-downloader 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -2
- novel_downloader/cli/__init__.py +0 -1
- novel_downloader/cli/clean.py +2 -10
- novel_downloader/cli/download.py +18 -22
- novel_downloader/cli/interactive.py +0 -1
- novel_downloader/cli/main.py +1 -3
- novel_downloader/cli/settings.py +8 -8
- novel_downloader/config/__init__.py +0 -1
- novel_downloader/config/adapter.py +48 -18
- novel_downloader/config/loader.py +116 -108
- novel_downloader/config/models.py +41 -32
- novel_downloader/config/site_rules.py +2 -4
- novel_downloader/core/__init__.py +0 -1
- novel_downloader/core/downloaders/__init__.py +4 -4
- novel_downloader/core/downloaders/base/__init__.py +14 -0
- novel_downloader/core/downloaders/{base_async_downloader.py → base/base_async.py} +49 -53
- novel_downloader/core/downloaders/{base_downloader.py → base/base_sync.py} +64 -43
- novel_downloader/core/downloaders/biquge/__init__.py +12 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +25 -0
- novel_downloader/core/downloaders/common/__init__.py +14 -0
- novel_downloader/core/downloaders/{common_asynb_downloader.py → common/common_async.py} +42 -33
- novel_downloader/core/downloaders/{common_downloader.py → common/common_sync.py} +34 -23
- novel_downloader/core/downloaders/qidian/__init__.py +10 -0
- novel_downloader/core/downloaders/{qidian_downloader.py → qidian/qidian_sync.py} +80 -64
- novel_downloader/core/factory/__init__.py +4 -5
- novel_downloader/core/factory/{downloader_factory.py → downloader.py} +36 -35
- novel_downloader/core/factory/{parser_factory.py → parser.py} +12 -14
- novel_downloader/core/factory/{requester_factory.py → requester.py} +29 -16
- novel_downloader/core/factory/{saver_factory.py → saver.py} +4 -9
- novel_downloader/core/interfaces/__init__.py +8 -9
- novel_downloader/core/interfaces/{async_downloader_protocol.py → async_downloader.py} +4 -5
- novel_downloader/core/interfaces/{async_requester_protocol.py → async_requester.py} +26 -12
- novel_downloader/core/interfaces/{parser_protocol.py → parser.py} +11 -6
- novel_downloader/core/interfaces/{saver_protocol.py → saver.py} +2 -3
- novel_downloader/core/interfaces/{downloader_protocol.py → sync_downloader.py} +6 -7
- novel_downloader/core/interfaces/{requester_protocol.py → sync_requester.py} +34 -17
- novel_downloader/core/parsers/__init__.py +5 -4
- novel_downloader/core/parsers/{base_parser.py → base.py} +20 -11
- novel_downloader/core/parsers/biquge/__init__.py +10 -0
- novel_downloader/core/parsers/biquge/main_parser.py +126 -0
- novel_downloader/core/parsers/{common_parser → common}/__init__.py +2 -3
- novel_downloader/core/parsers/{common_parser → common}/helper.py +20 -18
- novel_downloader/core/parsers/{common_parser → common}/main_parser.py +15 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_encrypted.py +41 -49
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_normal.py +17 -21
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py +16 -12
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py +2 -3
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py +37 -45
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py +19 -23
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py +10 -9
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py +16 -12
- novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py +7 -10
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py +2 -3
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +150 -0
- novel_downloader/core/parsers/{qidian_parser → qidian}/shared/helpers.py +9 -10
- novel_downloader/core/requesters/__init__.py +9 -5
- novel_downloader/core/requesters/base/__init__.py +16 -0
- novel_downloader/core/requesters/{base_async_session.py → base/async_session.py} +180 -73
- novel_downloader/core/requesters/base/browser.py +340 -0
- novel_downloader/core/requesters/base/session.py +364 -0
- novel_downloader/core/requesters/biquge/__init__.py +12 -0
- novel_downloader/core/requesters/biquge/session.py +90 -0
- novel_downloader/core/requesters/{common_requester → common}/__init__.py +4 -5
- novel_downloader/core/requesters/common/async_session.py +96 -0
- novel_downloader/core/requesters/common/session.py +113 -0
- novel_downloader/core/requesters/qidian/__init__.py +21 -0
- novel_downloader/core/requesters/qidian/broswer.py +306 -0
- novel_downloader/core/requesters/qidian/session.py +287 -0
- novel_downloader/core/savers/__init__.py +5 -3
- novel_downloader/core/savers/{base_saver.py → base.py} +12 -13
- novel_downloader/core/savers/biquge.py +25 -0
- novel_downloader/core/savers/{common_saver → common}/__init__.py +2 -3
- novel_downloader/core/savers/{common_saver/common_epub.py → common/epub.py} +24 -52
- novel_downloader/core/savers/{common_saver → common}/main_saver.py +43 -9
- novel_downloader/core/savers/{common_saver/common_txt.py → common/txt.py} +16 -46
- novel_downloader/core/savers/epub_utils/__init__.py +0 -1
- novel_downloader/core/savers/epub_utils/css_builder.py +13 -7
- novel_downloader/core/savers/epub_utils/initializer.py +4 -5
- novel_downloader/core/savers/epub_utils/text_to_html.py +2 -3
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -3
- novel_downloader/core/savers/{qidian_saver.py → qidian.py} +12 -6
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +9 -1
- novel_downloader/resources/config/settings.toml +88 -0
- novel_downloader/utils/cache.py +2 -2
- novel_downloader/utils/chapter_storage.py +340 -0
- novel_downloader/utils/constants.py +8 -5
- novel_downloader/utils/crypto_utils.py +3 -3
- novel_downloader/utils/file_utils/__init__.py +0 -1
- novel_downloader/utils/file_utils/io.py +12 -17
- novel_downloader/utils/file_utils/normalize.py +1 -3
- novel_downloader/utils/file_utils/sanitize.py +2 -9
- novel_downloader/utils/fontocr/__init__.py +0 -1
- novel_downloader/utils/fontocr/ocr_v1.py +19 -22
- novel_downloader/utils/fontocr/ocr_v2.py +147 -60
- novel_downloader/utils/hash_store.py +19 -20
- novel_downloader/utils/hash_utils.py +0 -1
- novel_downloader/utils/i18n.py +3 -4
- novel_downloader/utils/logger.py +5 -6
- novel_downloader/utils/model_loader.py +5 -8
- novel_downloader/utils/network.py +9 -10
- novel_downloader/utils/state.py +6 -7
- novel_downloader/utils/text_utils/__init__.py +0 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +2 -7
- novel_downloader/utils/text_utils/diff_display.py +0 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -4
- novel_downloader/utils/text_utils/text_cleaning.py +0 -1
- novel_downloader/utils/time_utils/__init__.py +0 -1
- novel_downloader/utils/time_utils/datetime_utils.py +9 -11
- novel_downloader/utils/time_utils/sleep_utils.py +27 -13
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/METADATA +14 -17
- novel_downloader-1.3.0.dist-info/RECORD +127 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/WHEEL +1 -1
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +0 -95
- novel_downloader/core/requesters/base_browser.py +0 -210
- novel_downloader/core/requesters/base_session.py +0 -243
- novel_downloader/core/requesters/common_requester/common_async_session.py +0 -98
- novel_downloader/core/requesters/common_requester/common_session.py +0 -126
- novel_downloader/core/requesters/qidian_requester/__init__.py +0 -22
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -377
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +0 -202
- novel_downloader/resources/config/settings.yaml +0 -76
- novel_downloader-1.2.1.dist-info/RECORD +0 -115
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.2.1.dist-info → novel_downloader-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.parsers.
|
5
|
-
|
3
|
+
novel_downloader.core.parsers.qidian.browser.main_parser
|
4
|
+
--------------------------------------------------------
|
6
5
|
|
7
6
|
Main parser class for handling Qidian chapters rendered via a browser environment.
|
8
7
|
|
@@ -13,10 +12,11 @@ content extracted from dynamically rendered Qidian HTML pages.
|
|
13
12
|
from __future__ import annotations
|
14
13
|
|
15
14
|
from pathlib import Path
|
16
|
-
from typing import TYPE_CHECKING, Any
|
15
|
+
from typing import TYPE_CHECKING, Any
|
17
16
|
|
18
17
|
from novel_downloader.config.models import ParserConfig
|
19
|
-
from novel_downloader.core.parsers.
|
18
|
+
from novel_downloader.core.parsers.base import BaseParser
|
19
|
+
from novel_downloader.utils.chapter_storage import ChapterDict
|
20
20
|
|
21
21
|
from ..shared import (
|
22
22
|
is_encrypted,
|
@@ -47,9 +47,9 @@ class QidianBrowserParser(BaseParser):
|
|
47
47
|
|
48
48
|
self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
|
49
49
|
self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
|
50
|
-
self._font_debug_dir:
|
50
|
+
self._font_debug_dir: Path | None = None
|
51
51
|
|
52
|
-
self._font_ocr:
|
52
|
+
self._font_ocr: FontOCR | None = None
|
53
53
|
if self._decode_font:
|
54
54
|
from novel_downloader.utils.fontocr import FontOCR
|
55
55
|
|
@@ -66,19 +66,23 @@ class QidianBrowserParser(BaseParser):
|
|
66
66
|
vec_weight=config.vec_weight,
|
67
67
|
font_debug=config.save_font_debug,
|
68
68
|
)
|
69
|
-
self._font_debug_dir = self._base_cache_dir / "font_debug"
|
69
|
+
self._font_debug_dir = self._base_cache_dir / "qidian" / "font_debug"
|
70
70
|
self._font_debug_dir.mkdir(parents=True, exist_ok=True)
|
71
71
|
|
72
|
-
def parse_book_info(self,
|
72
|
+
def parse_book_info(self, html_str: str) -> dict[str, Any]:
|
73
73
|
"""
|
74
74
|
Parse a book info page and extract metadata and chapter structure.
|
75
75
|
|
76
|
-
:param
|
76
|
+
:param html_str: Raw HTML of the book info page.
|
77
77
|
:return: Parsed metadata and chapter structure as a dictionary.
|
78
78
|
"""
|
79
|
-
return parse_book_info(
|
79
|
+
return parse_book_info(html_str)
|
80
80
|
|
81
|
-
def parse_chapter(
|
81
|
+
def parse_chapter(
|
82
|
+
self,
|
83
|
+
html_str: str,
|
84
|
+
chapter_id: str,
|
85
|
+
) -> ChapterDict | None:
|
82
86
|
"""
|
83
87
|
:param html: Raw HTML of the chapter page.
|
84
88
|
:param chapter_id: Identifier of the chapter being parsed.
|
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.parsers.
|
5
|
-
|
3
|
+
novel_downloader.core.parsers.qidian.session
|
4
|
+
--------------------------------------------
|
6
5
|
|
7
6
|
This package provides parsing components for handling Qidian
|
8
7
|
pages that have been rendered by a session.
|
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.parsers.
|
5
|
-
|
3
|
+
novel_downloader.core.parsers.qidian.session.chapter_encrypted
|
4
|
+
--------------------------------------------------------------
|
6
5
|
|
7
6
|
Support for parsing encrypted chapters from Qidian using font OCR mapping,
|
8
7
|
CSS rules, and custom rendering logic.
|
@@ -19,11 +18,12 @@ from __future__ import annotations
|
|
19
18
|
import json
|
20
19
|
import logging
|
21
20
|
from pathlib import Path
|
22
|
-
from typing import TYPE_CHECKING, Any
|
21
|
+
from typing import TYPE_CHECKING, Any
|
23
22
|
|
24
23
|
import tinycss2
|
25
24
|
from bs4 import BeautifulSoup, Tag
|
26
25
|
|
26
|
+
from novel_downloader.utils.chapter_storage import ChapterDict
|
27
27
|
from novel_downloader.utils.network import download_font_file
|
28
28
|
from novel_downloader.utils.text_utils import apply_font_mapping
|
29
29
|
|
@@ -40,7 +40,7 @@ if TYPE_CHECKING:
|
|
40
40
|
|
41
41
|
logger = logging.getLogger(__name__)
|
42
42
|
IGNORED_CLASS_LISTS = {"title", "review"}
|
43
|
-
_decryptor:
|
43
|
+
_decryptor: QidianNodeDecryptor | None = None
|
44
44
|
|
45
45
|
|
46
46
|
def _get_decryptor() -> QidianNodeDecryptor:
|
@@ -58,7 +58,7 @@ def parse_encrypted_chapter(
|
|
58
58
|
soup: BeautifulSoup,
|
59
59
|
chapter_id: str,
|
60
60
|
fuid: str,
|
61
|
-
) ->
|
61
|
+
) -> ChapterDict | None:
|
62
62
|
"""
|
63
63
|
Extract and return the formatted textual content of an encrypted chapter.
|
64
64
|
|
@@ -75,15 +75,15 @@ def parse_encrypted_chapter(
|
|
75
75
|
"""
|
76
76
|
try:
|
77
77
|
if not (parser._decode_font and parser._font_ocr):
|
78
|
-
return
|
78
|
+
return None
|
79
79
|
ssr_data = find_ssr_page_context(soup)
|
80
80
|
chapter_info = extract_chapter_info(ssr_data)
|
81
81
|
if not chapter_info:
|
82
82
|
logger.warning(
|
83
83
|
"[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
|
84
84
|
)
|
85
|
-
return
|
86
|
-
debug_base_dir:
|
85
|
+
return None
|
86
|
+
debug_base_dir: Path | None = None
|
87
87
|
if parser._font_debug_dir:
|
88
88
|
debug_base_dir = parser._font_debug_dir / chapter_id
|
89
89
|
debug_base_dir.mkdir(parents=True, exist_ok=True)
|
@@ -101,15 +101,12 @@ def parse_encrypted_chapter(
|
|
101
101
|
update_timestamp = chapter_info.get("updateTimestamp", 0)
|
102
102
|
modify_time = chapter_info.get("modifyTime", 0)
|
103
103
|
word_count = chapter_info.get("wordsCount", 0)
|
104
|
-
vip = bool(chapter_info.get("vipStatus", 0))
|
105
|
-
is_buy = bool(chapter_info.get("isBuy", 0))
|
106
104
|
seq = chapter_info.get("seq", None)
|
107
|
-
order = chapter_info.get("chapterOrder", None)
|
108
105
|
volume = chapter_info.get("extra", {}).get("volumeName", "")
|
109
106
|
|
110
107
|
if not raw_html:
|
111
108
|
logger.warning("[Parser] raw_html not found for chapter '%s'", chapter_id)
|
112
|
-
return
|
109
|
+
return None
|
113
110
|
|
114
111
|
# extract + save font
|
115
112
|
rf = json.loads(randomFont_str)
|
@@ -136,7 +133,7 @@ def parse_encrypted_chapter(
|
|
136
133
|
)
|
137
134
|
except Exception as e:
|
138
135
|
logger.error("[Parser] decryption failed for '%s': %s", chapter_id, e)
|
139
|
-
return
|
136
|
+
return None
|
140
137
|
main_paragraphs = extract_paragraphs_recursively(html_to_soup(raw_html))
|
141
138
|
if debug_base_dir:
|
142
139
|
main_paragraphs_path = debug_base_dir / "main_paragraphs_debug.json"
|
@@ -159,7 +156,7 @@ def parse_encrypted_chapter(
|
|
159
156
|
paragraphs_str_path.write_text(paragraphs_str, encoding="utf-8")
|
160
157
|
|
161
158
|
# Run OCR + fallback mapping
|
162
|
-
char_set =
|
159
|
+
char_set = {c for c in paragraphs_str if c not in {" ", "\n", "\u3000"}}
|
163
160
|
refl_set = set(refl_list)
|
164
161
|
char_set = char_set - refl_set
|
165
162
|
if debug_base_dir:
|
@@ -190,33 +187,31 @@ def parse_encrypted_chapter(
|
|
190
187
|
final_paragraphs_str = "\n\n".join(
|
191
188
|
line.strip() for line in original_text.splitlines() if line.strip()
|
192
189
|
)
|
193
|
-
|
190
|
+
return {
|
194
191
|
"id": str(chapter_id),
|
195
|
-
"title": title,
|
192
|
+
"title": str(title),
|
196
193
|
"content": final_paragraphs_str,
|
197
|
-
"
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
"volume": volume,
|
194
|
+
"extra": {
|
195
|
+
"author_say": author_say.strip() if author_say else "",
|
196
|
+
"updated_at": update_time,
|
197
|
+
"update_timestamp": update_timestamp,
|
198
|
+
"modify_time": modify_time,
|
199
|
+
"word_count": word_count,
|
200
|
+
"seq": seq,
|
201
|
+
"volume": volume,
|
202
|
+
},
|
207
203
|
}
|
208
|
-
return chapter_info
|
209
204
|
|
210
205
|
except Exception as e:
|
211
206
|
logger.warning(
|
212
207
|
"[Parser] parse error for encrypted chapter '%s': %s", chapter_id, e
|
213
208
|
)
|
214
|
-
return
|
209
|
+
return None
|
215
210
|
|
216
211
|
|
217
212
|
def extract_paragraphs_recursively(
|
218
213
|
soup: BeautifulSoup, chapter_id: int = -1
|
219
|
-
) ->
|
214
|
+
) -> list[dict[str, Any]]:
|
220
215
|
"""
|
221
216
|
Extracts paragraph elements under <main id="c-{chapter_id}"> from HTML
|
222
217
|
and converts them to a nested data structure for further processing.
|
@@ -227,7 +222,7 @@ def extract_paragraphs_recursively(
|
|
227
222
|
:return list: List of parsed <p> paragraph data.
|
228
223
|
"""
|
229
224
|
|
230
|
-
def parse_element(elem: Any) ->
|
225
|
+
def parse_element(elem: Any) -> dict[str, Any] | None:
|
231
226
|
if not isinstance(elem, Tag):
|
232
227
|
return None
|
233
228
|
result = {"tag": elem.name, "attrs": dict(elem.attrs), "data": []}
|
@@ -245,7 +240,7 @@ def extract_paragraphs_recursively(
|
|
245
240
|
if chapter_id > 0:
|
246
241
|
main_id = f"c-{chapter_id}"
|
247
242
|
main_tag = soup.find("main", id=main_id)
|
248
|
-
if not main_tag:
|
243
|
+
if not isinstance(main_tag, Tag):
|
249
244
|
return []
|
250
245
|
else:
|
251
246
|
main_tag = soup
|
@@ -259,7 +254,7 @@ def extract_paragraphs_recursively(
|
|
259
254
|
return result
|
260
255
|
|
261
256
|
|
262
|
-
def parse_rule(css_str: str) ->
|
257
|
+
def parse_rule(css_str: str) -> dict[str, Any]:
|
263
258
|
"""
|
264
259
|
Parse a CSS string and extract style rules for rendering.
|
265
260
|
|
@@ -274,7 +269,7 @@ def parse_rule(css_str: str) -> Dict[str, Any]:
|
|
274
269
|
:return: Dict with "rules" and "orders" for rendering.
|
275
270
|
"""
|
276
271
|
|
277
|
-
rules:
|
272
|
+
rules: dict[str, Any] = {}
|
278
273
|
orders = []
|
279
274
|
|
280
275
|
stylesheet = tinycss2.parse_stylesheet(
|
@@ -339,8 +334,8 @@ def parse_rule(css_str: str) -> Dict[str, Any]:
|
|
339
334
|
|
340
335
|
|
341
336
|
def render_paragraphs(
|
342
|
-
main_paragraphs:
|
343
|
-
) ->
|
337
|
+
main_paragraphs: list[dict[str, Any]], rules: dict[str, Any]
|
338
|
+
) -> tuple[str, list[str]]:
|
344
339
|
"""
|
345
340
|
Applies the parsed CSS rules to the paragraph structure and
|
346
341
|
reconstructs the visible text.
|
@@ -358,11 +353,11 @@ def render_paragraphs(
|
|
358
353
|
- A reconstructed paragraph string with line breaks.
|
359
354
|
- A list of mirrored (reflected) characters for later OCR processing.
|
360
355
|
"""
|
361
|
-
orders:
|
356
|
+
orders: list[tuple[str, str]] = rules.get("orders", [])
|
362
357
|
rules = rules.get("rules", {})
|
363
|
-
refl_list:
|
358
|
+
refl_list: list[str] = []
|
364
359
|
|
365
|
-
def apply_rule(data:
|
360
|
+
def apply_rule(data: dict[str, Any], rule: dict[str, Any]) -> str:
|
366
361
|
if rule.get("delete-all", False):
|
367
362
|
return ""
|
368
363
|
|
@@ -373,10 +368,7 @@ def render_paragraphs(
|
|
373
368
|
curr_str += first_data
|
374
369
|
|
375
370
|
if rule.get("delete-first", False):
|
376
|
-
if len(curr_str) <= 1:
|
377
|
-
curr_str = ""
|
378
|
-
else:
|
379
|
-
curr_str = curr_str[1:]
|
371
|
+
curr_str = "" if len(curr_str) <= 1 else curr_str[1:]
|
380
372
|
|
381
373
|
curr_str += rule.get("append-end-char", "")
|
382
374
|
|
@@ -433,7 +425,7 @@ def render_paragraphs(
|
|
433
425
|
logger.debug(f"[parser] not find p_class_str: {class_list}")
|
434
426
|
continue
|
435
427
|
# 普通标签处理,根据 orders 顺序匹配
|
436
|
-
for ord_selector,
|
428
|
+
for ord_selector, _ in orders:
|
437
429
|
tag_name = f"{ord_selector}"
|
438
430
|
if data.get("tag") != tag_name:
|
439
431
|
continue
|
@@ -442,7 +434,7 @@ def render_paragraphs(
|
|
442
434
|
ordered_cache[ord_selector] = apply_rule(data, curr_rule)
|
443
435
|
break
|
444
436
|
# 最后按 orders 顺序拼接
|
445
|
-
for ord_selector,
|
437
|
+
for ord_selector, _ in orders:
|
446
438
|
if ord_selector in ordered_cache:
|
447
439
|
paragraphs_str += ordered_cache[ord_selector]
|
448
440
|
|
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.parsers.
|
5
|
-
|
3
|
+
novel_downloader.core.parsers.qidian.session.chapter_normal
|
4
|
+
-----------------------------------------------------------
|
6
5
|
|
7
6
|
Provides `parse_normal_chapter`, which will:
|
8
7
|
|
@@ -12,10 +11,11 @@ Provides `parse_normal_chapter`, which will:
|
|
12
11
|
"""
|
13
12
|
|
14
13
|
import logging
|
15
|
-
from typing import Any, Dict, Optional
|
16
14
|
|
17
15
|
from bs4 import BeautifulSoup
|
18
16
|
|
17
|
+
from novel_downloader.utils.chapter_storage import ChapterDict
|
18
|
+
|
19
19
|
from ..shared import (
|
20
20
|
extract_chapter_info,
|
21
21
|
find_ssr_page_context,
|
@@ -25,7 +25,7 @@ from ..shared import (
|
|
25
25
|
from .node_decryptor import QidianNodeDecryptor
|
26
26
|
|
27
27
|
logger = logging.getLogger(__name__)
|
28
|
-
_decryptor:
|
28
|
+
_decryptor: QidianNodeDecryptor | None = None
|
29
29
|
|
30
30
|
|
31
31
|
def _get_decryptor() -> QidianNodeDecryptor:
|
@@ -42,7 +42,7 @@ def parse_normal_chapter(
|
|
42
42
|
soup: BeautifulSoup,
|
43
43
|
chapter_id: str,
|
44
44
|
fuid: str,
|
45
|
-
) ->
|
45
|
+
) -> ChapterDict | None:
|
46
46
|
"""
|
47
47
|
Extract structured chapter info from a normal Qidian page.
|
48
48
|
|
@@ -58,7 +58,7 @@ def parse_normal_chapter(
|
|
58
58
|
logger.warning(
|
59
59
|
"[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
|
60
60
|
)
|
61
|
-
return
|
61
|
+
return None
|
62
62
|
|
63
63
|
title = chapter_info.get("chapterName", "Untitled")
|
64
64
|
raw_html = chapter_info.get("content", "")
|
@@ -69,15 +69,12 @@ def parse_normal_chapter(
|
|
69
69
|
update_timestamp = chapter_info.get("updateTimestamp", 0)
|
70
70
|
modify_time = chapter_info.get("modifyTime", 0)
|
71
71
|
word_count = chapter_info.get("wordsCount", 0)
|
72
|
-
vip = bool(chapter_info.get("vipStatus", 0))
|
73
|
-
is_buy = bool(chapter_info.get("isBuy", 0))
|
74
72
|
seq = chapter_info.get("seq", None)
|
75
|
-
order = chapter_info.get("chapterOrder", None)
|
76
73
|
volume = chapter_info.get("extra", {}).get("volumeName", "")
|
77
74
|
|
78
75
|
if not raw_html:
|
79
76
|
logger.warning("[Parser] raw_html not found for chapter '%s'", chapter_id)
|
80
|
-
return
|
77
|
+
return None
|
81
78
|
|
82
79
|
if vip_status(soup):
|
83
80
|
try:
|
@@ -90,7 +87,7 @@ def parse_normal_chapter(
|
|
90
87
|
)
|
91
88
|
except Exception as e:
|
92
89
|
logger.error("[Parser] decryption failed for '%s': %s", chapter_id, e)
|
93
|
-
return
|
90
|
+
return None
|
94
91
|
|
95
92
|
paras_soup = html_to_soup(raw_html)
|
96
93
|
paras = [p.get_text(strip=True) for p in paras_soup.find_all("p")]
|
@@ -100,20 +97,19 @@ def parse_normal_chapter(
|
|
100
97
|
"id": str(chapter_id),
|
101
98
|
"title": title,
|
102
99
|
"content": chapter_text,
|
103
|
-
"
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
"volume": volume,
|
100
|
+
"extra": {
|
101
|
+
"author_say": author_say.strip() if author_say else "",
|
102
|
+
"updated_at": update_time,
|
103
|
+
"update_timestamp": update_timestamp,
|
104
|
+
"modify_time": modify_time,
|
105
|
+
"word_count": word_count,
|
106
|
+
"seq": seq,
|
107
|
+
"volume": volume,
|
108
|
+
},
|
113
109
|
}
|
114
110
|
|
115
111
|
except Exception as e:
|
116
112
|
logger.warning(
|
117
113
|
"[Parser] parse error for normal chapter '%s': %s", chapter_id, e
|
118
114
|
)
|
119
|
-
|
115
|
+
return None
|
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.parsers.
|
5
|
-
|
3
|
+
novel_downloader.core.parsers.qidian.session.chapter_router
|
4
|
+
-----------------------------------------------------------
|
6
5
|
|
7
6
|
Routing logic for selecting the correct chapter parser for Qidian session pages.
|
8
7
|
"""
|
@@ -10,7 +9,9 @@ Routing logic for selecting the correct chapter parser for Qidian session pages.
|
|
10
9
|
from __future__ import annotations
|
11
10
|
|
12
11
|
import logging
|
13
|
-
from typing import TYPE_CHECKING
|
12
|
+
from typing import TYPE_CHECKING
|
13
|
+
|
14
|
+
from novel_downloader.utils.chapter_storage import ChapterDict
|
14
15
|
|
15
16
|
from ..shared import (
|
16
17
|
can_view_chapter,
|
@@ -29,7 +30,7 @@ def parse_chapter(
|
|
29
30
|
parser: QidianSessionParser,
|
30
31
|
html_str: str,
|
31
32
|
chapter_id: str,
|
32
|
-
) ->
|
33
|
+
) -> ChapterDict | None:
|
33
34
|
"""
|
34
35
|
Extract and return the formatted textual content of chapter.
|
35
36
|
|
@@ -45,11 +46,11 @@ def parse_chapter(
|
|
45
46
|
logger.warning(
|
46
47
|
"[Parser] Chapter '%s' is not purchased or inaccessible.", chapter_id
|
47
48
|
)
|
48
|
-
return
|
49
|
+
return None
|
49
50
|
|
50
51
|
if is_encrypted(soup):
|
51
52
|
if not parser._decode_font:
|
52
|
-
return
|
53
|
+
return None
|
53
54
|
try:
|
54
55
|
from .chapter_encrypted import parse_encrypted_chapter
|
55
56
|
|
@@ -59,9 +60,9 @@ def parse_chapter(
|
|
59
60
|
"[Parser] Encrypted chapter '%s' requires extra dependencies.",
|
60
61
|
chapter_id,
|
61
62
|
)
|
62
|
-
return
|
63
|
+
return None
|
63
64
|
|
64
65
|
return parse_normal_chapter(soup, chapter_id, parser._fuid)
|
65
66
|
except Exception as e:
|
66
67
|
logger.warning("[Parser] parse error for chapter '%s': %s", chapter_id, e)
|
67
|
-
|
68
|
+
return None
|
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.parsers.
|
5
|
-
|
3
|
+
novel_downloader.core.parsers.qidian.session.main_parser
|
4
|
+
--------------------------------------------------------
|
6
5
|
|
7
6
|
Main parser class for handling Qidian chapters rendered via a session.
|
8
7
|
|
@@ -13,10 +12,11 @@ content extracted from dynamically rendered Qidian HTML pages.
|
|
13
12
|
from __future__ import annotations
|
14
13
|
|
15
14
|
from pathlib import Path
|
16
|
-
from typing import TYPE_CHECKING, Any
|
15
|
+
from typing import TYPE_CHECKING, Any
|
17
16
|
|
18
17
|
from novel_downloader.config.models import ParserConfig
|
19
|
-
from novel_downloader.core.parsers.
|
18
|
+
from novel_downloader.core.parsers.base import BaseParser
|
19
|
+
from novel_downloader.utils.chapter_storage import ChapterDict
|
20
20
|
from novel_downloader.utils.state import state_mgr
|
21
21
|
|
22
22
|
from ..shared import (
|
@@ -48,12 +48,12 @@ class QidianSessionParser(BaseParser):
|
|
48
48
|
|
49
49
|
self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
|
50
50
|
self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
|
51
|
-
self._font_debug_dir:
|
51
|
+
self._font_debug_dir: Path | None = None
|
52
52
|
|
53
53
|
qd_cookies = state_mgr.get_cookies("qidian")
|
54
54
|
self._fuid: str = qd_cookies.get("ywguid", "")
|
55
55
|
|
56
|
-
self._font_ocr:
|
56
|
+
self._font_ocr: FontOCR | None = None
|
57
57
|
if self._decode_font:
|
58
58
|
from novel_downloader.utils.fontocr import FontOCR
|
59
59
|
|
@@ -69,19 +69,23 @@ class QidianSessionParser(BaseParser):
|
|
69
69
|
vec_weight=config.vec_weight,
|
70
70
|
font_debug=config.save_font_debug,
|
71
71
|
)
|
72
|
-
self._font_debug_dir = self._base_cache_dir / "font_debug"
|
72
|
+
self._font_debug_dir = self._base_cache_dir / "qidian" / "font_debug"
|
73
73
|
self._font_debug_dir.mkdir(parents=True, exist_ok=True)
|
74
74
|
|
75
|
-
def parse_book_info(self,
|
75
|
+
def parse_book_info(self, html_str: str) -> dict[str, Any]:
|
76
76
|
"""
|
77
77
|
Parse a book info page and extract metadata and chapter structure.
|
78
78
|
|
79
|
-
:param
|
79
|
+
:param html_str: Raw HTML of the book info page.
|
80
80
|
:return: Parsed metadata and chapter structure as a dictionary.
|
81
81
|
"""
|
82
|
-
return parse_book_info(
|
82
|
+
return parse_book_info(html_str)
|
83
83
|
|
84
|
-
def parse_chapter(
|
84
|
+
def parse_chapter(
|
85
|
+
self,
|
86
|
+
html_str: str,
|
87
|
+
chapter_id: str,
|
88
|
+
) -> ChapterDict | None:
|
85
89
|
"""
|
86
90
|
:param html: Raw HTML of the chapter page.
|
87
91
|
:param chapter_id: Identifier of the chapter being parsed.
|
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.parsers.
|
5
|
-
|
3
|
+
novel_downloader.core.parsers.qidian.session.node_decryptor
|
4
|
+
-----------------------------------------------------------
|
6
5
|
|
7
6
|
Provides QidianNodeDecryptor, which ensures a Node.js environment,
|
8
7
|
downloads or installs the required JS modules (Fock + decrypt script),
|
@@ -15,7 +14,6 @@ import shutil
|
|
15
14
|
import subprocess
|
16
15
|
import uuid
|
17
16
|
from pathlib import Path
|
18
|
-
from typing import Union
|
19
17
|
|
20
18
|
from novel_downloader.utils.constants import (
|
21
19
|
JS_SCRIPT_DIR,
|
@@ -68,7 +66,7 @@ class QidianNodeDecryptor:
|
|
68
66
|
"""
|
69
67
|
# 1) Check Node.js
|
70
68
|
if not shutil.which("node"):
|
71
|
-
raise
|
69
|
+
raise OSError("Node.js is not installed or not in PATH.")
|
72
70
|
|
73
71
|
# 2) Copy bundled decrypt script into place if missing
|
74
72
|
if not self.QIDIAN_DECRYPT_SCRIPT_PATH.exists():
|
@@ -102,8 +100,8 @@ class QidianNodeDecryptor:
|
|
102
100
|
|
103
101
|
def decrypt(
|
104
102
|
self,
|
105
|
-
ciphertext:
|
106
|
-
chapter_id:
|
103
|
+
ciphertext: str | bytes,
|
104
|
+
chapter_id: str | int,
|
107
105
|
fkp: str,
|
108
106
|
fuid: str,
|
109
107
|
) -> str:
|
@@ -120,7 +118,7 @@ class QidianNodeDecryptor:
|
|
120
118
|
# Normalize inputs
|
121
119
|
cipher_str = (
|
122
120
|
ciphertext.decode("utf-8")
|
123
|
-
if isinstance(ciphertext, (bytes
|
121
|
+
if isinstance(ciphertext, (bytes | bytearray))
|
124
122
|
else str(ciphertext)
|
125
123
|
)
|
126
124
|
chapter_str = str(chapter_id)
|
@@ -146,8 +144,7 @@ class QidianNodeDecryptor:
|
|
146
144
|
|
147
145
|
proc = subprocess.run(
|
148
146
|
["node", self.script_path.name, input_path.name, output_path.name],
|
149
|
-
|
150
|
-
stderr=subprocess.PIPE,
|
147
|
+
capture_output=True,
|
151
148
|
text=True,
|
152
149
|
cwd=str(self.script_dir),
|
153
150
|
)
|
@@ -1,8 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
2
|
"""
|
4
|
-
novel_downloader.core.parsers.
|
5
|
-
|
3
|
+
novel_downloader.core.parsers.qidian.shared
|
4
|
+
-------------------------------------------
|
6
5
|
|
7
6
|
Shared parsing utilities for Qidian parser components.
|
8
7
|
|