novel-downloader 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/clean.py +97 -78
- novel_downloader/cli/config.py +177 -0
- novel_downloader/cli/download.py +132 -87
- novel_downloader/cli/export.py +77 -0
- novel_downloader/cli/main.py +21 -28
- novel_downloader/config/__init__.py +1 -25
- novel_downloader/config/adapter.py +32 -31
- novel_downloader/config/loader.py +3 -3
- novel_downloader/config/site_rules.py +1 -2
- novel_downloader/core/__init__.py +3 -6
- novel_downloader/core/downloaders/__init__.py +10 -13
- novel_downloader/core/downloaders/base.py +233 -0
- novel_downloader/core/downloaders/biquge.py +27 -0
- novel_downloader/core/downloaders/common.py +414 -0
- novel_downloader/core/downloaders/esjzone.py +27 -0
- novel_downloader/core/downloaders/linovelib.py +27 -0
- novel_downloader/core/downloaders/qianbi.py +27 -0
- novel_downloader/core/downloaders/qidian.py +352 -0
- novel_downloader/core/downloaders/sfacg.py +27 -0
- novel_downloader/core/downloaders/yamibo.py +27 -0
- novel_downloader/core/exporters/__init__.py +37 -0
- novel_downloader/core/{savers → exporters}/base.py +73 -39
- novel_downloader/core/exporters/biquge.py +25 -0
- novel_downloader/core/exporters/common/__init__.py +12 -0
- novel_downloader/core/{savers → exporters}/common/epub.py +22 -22
- novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +35 -40
- novel_downloader/core/{savers → exporters}/common/txt.py +20 -23
- novel_downloader/core/{savers → exporters}/epub_utils/__init__.py +8 -3
- novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -2
- novel_downloader/core/{savers → exporters}/epub_utils/image_loader.py +46 -4
- novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -4
- novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +3 -3
- novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -2
- novel_downloader/core/exporters/esjzone.py +25 -0
- novel_downloader/core/exporters/linovelib/__init__.py +10 -0
- novel_downloader/core/exporters/linovelib/epub.py +449 -0
- novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
- novel_downloader/core/exporters/linovelib/txt.py +129 -0
- novel_downloader/core/exporters/qianbi.py +25 -0
- novel_downloader/core/{savers → exporters}/qidian.py +8 -8
- novel_downloader/core/exporters/sfacg.py +25 -0
- novel_downloader/core/exporters/yamibo.py +25 -0
- novel_downloader/core/factory/__init__.py +5 -17
- novel_downloader/core/factory/downloader.py +24 -126
- novel_downloader/core/factory/exporter.py +58 -0
- novel_downloader/core/factory/fetcher.py +96 -0
- novel_downloader/core/factory/parser.py +17 -12
- novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
- novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
- novel_downloader/core/fetchers/base/browser.py +383 -0
- novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
- novel_downloader/core/fetchers/base/session.py +419 -0
- novel_downloader/core/fetchers/biquge/__init__.py +14 -0
- novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
- novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
- novel_downloader/core/fetchers/common/__init__.py +14 -0
- novel_downloader/core/fetchers/common/browser.py +79 -0
- novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
- novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
- novel_downloader/core/fetchers/esjzone/browser.py +202 -0
- novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
- novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
- novel_downloader/core/fetchers/linovelib/browser.py +178 -0
- novel_downloader/core/fetchers/linovelib/session.py +178 -0
- novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
- novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
- novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
- novel_downloader/core/fetchers/qidian/__init__.py +14 -0
- novel_downloader/core/fetchers/qidian/browser.py +266 -0
- novel_downloader/core/fetchers/qidian/session.py +326 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
- novel_downloader/core/fetchers/sfacg/browser.py +189 -0
- novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
- novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
- novel_downloader/core/fetchers/yamibo/browser.py +229 -0
- novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
- novel_downloader/core/interfaces/__init__.py +8 -12
- novel_downloader/core/interfaces/downloader.py +54 -0
- novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
- novel_downloader/core/interfaces/fetcher.py +162 -0
- novel_downloader/core/interfaces/parser.py +6 -7
- novel_downloader/core/parsers/__init__.py +5 -6
- novel_downloader/core/parsers/base.py +9 -13
- novel_downloader/core/parsers/biquge/main_parser.py +12 -13
- novel_downloader/core/parsers/common/helper.py +3 -3
- novel_downloader/core/parsers/common/main_parser.py +39 -34
- novel_downloader/core/parsers/esjzone/main_parser.py +20 -14
- novel_downloader/core/parsers/linovelib/__init__.py +10 -0
- novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
- novel_downloader/core/parsers/qidian/__init__.py +2 -11
- novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
- novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
- novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
- novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
- novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
- novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
- novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
- novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
- novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
- novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
- novel_downloader/locales/en.json +18 -2
- novel_downloader/locales/zh.json +18 -2
- novel_downloader/models/__init__.py +64 -0
- novel_downloader/models/browser.py +21 -0
- novel_downloader/models/chapter.py +25 -0
- novel_downloader/models/config.py +100 -0
- novel_downloader/models/login.py +20 -0
- novel_downloader/models/site_rules.py +99 -0
- novel_downloader/models/tasks.py +33 -0
- novel_downloader/models/types.py +15 -0
- novel_downloader/resources/config/settings.toml +31 -25
- novel_downloader/resources/json/linovelib_font_map.json +3573 -0
- novel_downloader/tui/__init__.py +7 -0
- novel_downloader/tui/app.py +32 -0
- novel_downloader/tui/main.py +17 -0
- novel_downloader/tui/screens/__init__.py +14 -0
- novel_downloader/tui/screens/home.py +191 -0
- novel_downloader/tui/screens/login.py +74 -0
- novel_downloader/tui/styles/home_layout.tcss +79 -0
- novel_downloader/tui/widgets/richlog_handler.py +24 -0
- novel_downloader/utils/__init__.py +6 -0
- novel_downloader/utils/chapter_storage.py +25 -38
- novel_downloader/utils/constants.py +11 -5
- novel_downloader/utils/cookies.py +66 -0
- novel_downloader/utils/crypto_utils.py +1 -74
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -2
- novel_downloader/utils/hash_store.py +10 -18
- novel_downloader/utils/hash_utils.py +3 -2
- novel_downloader/utils/logger.py +2 -3
- novel_downloader/utils/network.py +2 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -1
- novel_downloader/utils/text_utils/text_cleaning.py +1 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -3
- novel_downloader/utils/time_utils/sleep_utils.py +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +69 -35
- novel_downloader-1.4.0.dist-info/RECORD +170 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
- novel_downloader/cli/interactive.py +0 -66
- novel_downloader/cli/settings.py +0 -177
- novel_downloader/config/models.py +0 -187
- novel_downloader/core/downloaders/base/__init__.py +0 -14
- novel_downloader/core/downloaders/base/base_async.py +0 -153
- novel_downloader/core/downloaders/base/base_sync.py +0 -208
- novel_downloader/core/downloaders/biquge/__init__.py +0 -14
- novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
- novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
- novel_downloader/core/downloaders/common/__init__.py +0 -14
- novel_downloader/core/downloaders/common/common_async.py +0 -210
- novel_downloader/core/downloaders/common/common_sync.py +0 -202
- novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
- novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
- novel_downloader/core/downloaders/qidian/__init__.py +0 -10
- novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -219
- novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
- novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
- novel_downloader/core/factory/requester.py +0 -144
- novel_downloader/core/factory/saver.py +0 -56
- novel_downloader/core/interfaces/async_downloader.py +0 -36
- novel_downloader/core/interfaces/async_requester.py +0 -84
- novel_downloader/core/interfaces/sync_downloader.py +0 -36
- novel_downloader/core/interfaces/sync_requester.py +0 -82
- novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
- novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
- novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
- novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
- novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
- novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
- novel_downloader/core/requesters/base/async_session.py +0 -410
- novel_downloader/core/requesters/base/browser.py +0 -337
- novel_downloader/core/requesters/base/session.py +0 -378
- novel_downloader/core/requesters/biquge/__init__.py +0 -14
- novel_downloader/core/requesters/common/__init__.py +0 -17
- novel_downloader/core/requesters/common/session.py +0 -113
- novel_downloader/core/requesters/esjzone/__init__.py +0 -13
- novel_downloader/core/requesters/esjzone/session.py +0 -235
- novel_downloader/core/requesters/qianbi/__init__.py +0 -13
- novel_downloader/core/requesters/qidian/__init__.py +0 -21
- novel_downloader/core/requesters/qidian/broswer.py +0 -307
- novel_downloader/core/requesters/qidian/session.py +0 -290
- novel_downloader/core/requesters/sfacg/__init__.py +0 -13
- novel_downloader/core/requesters/sfacg/session.py +0 -242
- novel_downloader/core/requesters/yamibo/__init__.py +0 -13
- novel_downloader/core/requesters/yamibo/session.py +0 -237
- novel_downloader/core/savers/__init__.py +0 -34
- novel_downloader/core/savers/biquge.py +0 -25
- novel_downloader/core/savers/common/__init__.py +0 -12
- novel_downloader/core/savers/esjzone.py +0 -25
- novel_downloader/core/savers/qianbi.py +0 -25
- novel_downloader/core/savers/sfacg.py +0 -25
- novel_downloader/core/savers/yamibo.py +0 -25
- novel_downloader/resources/config/rules.toml +0 -196
- novel_downloader-1.3.3.dist-info/RECORD +0 -166
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.interfaces.downloader
|
4
|
+
-------------------------------------------
|
5
|
+
|
6
|
+
This module defines the DownloaderProtocol, a structural interface
|
7
|
+
that outlines the expected behavior of any downloader class.
|
8
|
+
"""
|
9
|
+
|
10
|
+
from collections.abc import Awaitable, Callable
|
11
|
+
from typing import Any, Protocol, runtime_checkable
|
12
|
+
|
13
|
+
|
14
|
+
@runtime_checkable
|
15
|
+
class DownloaderProtocol(Protocol):
|
16
|
+
"""
|
17
|
+
Protocol for fully-asynchronous downloader classes.
|
18
|
+
|
19
|
+
Defines the expected interface for any downloader implementation,
|
20
|
+
including both batch and single book downloads,
|
21
|
+
as well as optional pre-download hooks.
|
22
|
+
"""
|
23
|
+
|
24
|
+
async def download(
|
25
|
+
self,
|
26
|
+
book_id: str,
|
27
|
+
*,
|
28
|
+
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
29
|
+
**kwargs: Any,
|
30
|
+
) -> None:
|
31
|
+
"""
|
32
|
+
Download logic for a single book.
|
33
|
+
|
34
|
+
:param book_id: The identifier of the book.
|
35
|
+
:param progress_hook: (optional) Called after each chapter;
|
36
|
+
args: completed_count, total_count.
|
37
|
+
"""
|
38
|
+
...
|
39
|
+
|
40
|
+
async def download_many(
|
41
|
+
self,
|
42
|
+
book_ids: list[str],
|
43
|
+
*,
|
44
|
+
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
45
|
+
**kwargs: Any,
|
46
|
+
) -> None:
|
47
|
+
"""
|
48
|
+
Batch download entry point.
|
49
|
+
|
50
|
+
:param book_ids: List of book IDs to download.
|
51
|
+
:param progress_hook: (optional) Called after each chapter;
|
52
|
+
args: completed_count, total_count.
|
53
|
+
"""
|
54
|
+
...
|
@@ -1,9 +1,9 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.core.interfaces.
|
4
|
-
|
3
|
+
novel_downloader.core.interfaces.exporter
|
4
|
+
-----------------------------------------
|
5
5
|
|
6
|
-
Defines the
|
6
|
+
Defines the ExporterProtocol interface for persisting completed books in
|
7
7
|
TXT, EPUB, Markdown, and PDF formats.
|
8
8
|
"""
|
9
9
|
|
@@ -11,23 +11,23 @@ from typing import Protocol, runtime_checkable
|
|
11
11
|
|
12
12
|
|
13
13
|
@runtime_checkable
|
14
|
-
class
|
14
|
+
class ExporterProtocol(Protocol):
|
15
15
|
"""
|
16
|
-
A
|
16
|
+
A exporter must implement a method to persist a completed book as plain text.
|
17
17
|
|
18
|
-
It may also optionally implement an EPUB (or other format)
|
18
|
+
It may also optionally implement an EPUB (or other format) exporter.
|
19
19
|
"""
|
20
20
|
|
21
|
-
def
|
21
|
+
def export(self, book_id: str) -> None:
|
22
22
|
"""
|
23
|
-
|
23
|
+
Export the book in the formats specified in config.
|
24
24
|
If a method is not implemented or fails, log the error and continue.
|
25
25
|
|
26
26
|
:param book_id: The book identifier (used for filename, lookup, etc.)
|
27
27
|
"""
|
28
28
|
...
|
29
29
|
|
30
|
-
def
|
30
|
+
def export_as_txt(self, book_id: str) -> None:
|
31
31
|
"""
|
32
32
|
Persist the assembled book as a .txt file.
|
33
33
|
|
@@ -35,7 +35,7 @@ class SaverProtocol(Protocol):
|
|
35
35
|
"""
|
36
36
|
...
|
37
37
|
|
38
|
-
def
|
38
|
+
def export_as_epub(self, book_id: str) -> None:
|
39
39
|
"""
|
40
40
|
Optional: Persist the assembled book as an .epub file.
|
41
41
|
|
@@ -43,7 +43,7 @@ class SaverProtocol(Protocol):
|
|
43
43
|
"""
|
44
44
|
...
|
45
45
|
|
46
|
-
def
|
46
|
+
def export_as_md(self, book_id: str) -> None:
|
47
47
|
"""
|
48
48
|
Optional: Persist the assembled book as a Markdown (.md) file.
|
49
49
|
|
@@ -51,7 +51,7 @@ class SaverProtocol(Protocol):
|
|
51
51
|
"""
|
52
52
|
...
|
53
53
|
|
54
|
-
def
|
54
|
+
def export_as_pdf(self, book_id: str) -> None:
|
55
55
|
"""
|
56
56
|
Optional: Persist the assembled book as a PDF file.
|
57
57
|
|
@@ -0,0 +1,162 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.interfaces.fetcher
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
Defines the Async FetcherProtocol interface for fetching raw HTML or JSON
|
7
|
+
for book info pages, individual chapters, managing request lifecycle
|
8
|
+
"""
|
9
|
+
|
10
|
+
import types
|
11
|
+
from typing import Any, Protocol, Self, runtime_checkable
|
12
|
+
|
13
|
+
from novel_downloader.models import LoginField
|
14
|
+
|
15
|
+
|
16
|
+
@runtime_checkable
|
17
|
+
class FetcherProtocol(Protocol):
|
18
|
+
"""
|
19
|
+
An async requester must be able to fetch raw HTML/data for:
|
20
|
+
- a book's info page,
|
21
|
+
- a specific chapter page,
|
22
|
+
and manage login/shutdown asynchronously.
|
23
|
+
"""
|
24
|
+
|
25
|
+
async def login(
|
26
|
+
self,
|
27
|
+
username: str = "",
|
28
|
+
password: str = "",
|
29
|
+
cookies: dict[str, str] | None = None,
|
30
|
+
attempt: int = 1,
|
31
|
+
**kwargs: Any,
|
32
|
+
) -> bool:
|
33
|
+
"""
|
34
|
+
Attempt to log in asynchronously.
|
35
|
+
:returns: True if login succeeded.
|
36
|
+
"""
|
37
|
+
...
|
38
|
+
|
39
|
+
async def get_book_info(
|
40
|
+
self,
|
41
|
+
book_id: str,
|
42
|
+
**kwargs: Any,
|
43
|
+
) -> list[str]:
|
44
|
+
"""
|
45
|
+
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
46
|
+
|
47
|
+
:param book_id: The book identifier.
|
48
|
+
:return: The page content as a string.
|
49
|
+
"""
|
50
|
+
...
|
51
|
+
|
52
|
+
async def get_book_chapter(
|
53
|
+
self,
|
54
|
+
book_id: str,
|
55
|
+
chapter_id: str,
|
56
|
+
**kwargs: Any,
|
57
|
+
) -> list[str]:
|
58
|
+
"""
|
59
|
+
Fetch the raw HTML (or JSON) of a single chapter asynchronously.
|
60
|
+
|
61
|
+
:param book_id: The book identifier.
|
62
|
+
:param chapter_id: The chapter identifier.
|
63
|
+
:return: The chapter content as string.
|
64
|
+
"""
|
65
|
+
...
|
66
|
+
|
67
|
+
async def get_bookcase(
|
68
|
+
self,
|
69
|
+
**kwargs: Any,
|
70
|
+
) -> list[str]:
|
71
|
+
"""
|
72
|
+
Optional: Retrieve the HTML content of the authenticated
|
73
|
+
user's bookcase page asynchronously.
|
74
|
+
|
75
|
+
:return: The HTML markup of the bookcase page.
|
76
|
+
"""
|
77
|
+
...
|
78
|
+
|
79
|
+
async def fetch(
|
80
|
+
self,
|
81
|
+
url: str,
|
82
|
+
**kwargs: Any,
|
83
|
+
) -> str:
|
84
|
+
"""
|
85
|
+
Perform a generic HTTP request and return the response body as text.
|
86
|
+
|
87
|
+
:param url: The URL to request.
|
88
|
+
:return: The response content as a string (HTML or JSON or plain text).
|
89
|
+
"""
|
90
|
+
...
|
91
|
+
|
92
|
+
async def init(
|
93
|
+
self,
|
94
|
+
**kwargs: Any,
|
95
|
+
) -> None:
|
96
|
+
"""
|
97
|
+
Perform async initialization, such as
|
98
|
+
launching a browser or creating a session.
|
99
|
+
|
100
|
+
This should be called before using any other method
|
101
|
+
if initialization is required.
|
102
|
+
"""
|
103
|
+
...
|
104
|
+
|
105
|
+
async def close(self) -> None:
|
106
|
+
"""
|
107
|
+
Shutdown and clean up any resources.
|
108
|
+
"""
|
109
|
+
...
|
110
|
+
|
111
|
+
async def load_state(self) -> bool:
|
112
|
+
"""
|
113
|
+
Restore session state from a persistent storage,
|
114
|
+
allowing the requester to resume a previous authenticated session.
|
115
|
+
|
116
|
+
:return: True if the session state was successfully loaded and applied.
|
117
|
+
"""
|
118
|
+
...
|
119
|
+
|
120
|
+
async def save_state(self) -> bool:
|
121
|
+
"""
|
122
|
+
Persist the current session state to a file
|
123
|
+
or other storage, so that it can be restored in future sessions.
|
124
|
+
|
125
|
+
:return: True if the session state was successfully saved.
|
126
|
+
"""
|
127
|
+
...
|
128
|
+
|
129
|
+
async def set_interactive_mode(self, enable: bool) -> bool:
|
130
|
+
"""
|
131
|
+
Enable or disable interactive mode for manual login.
|
132
|
+
|
133
|
+
:param enable: True to enable, False to disable interactive mode.
|
134
|
+
:return: True if operation or login check succeeded, False otherwise.
|
135
|
+
"""
|
136
|
+
...
|
137
|
+
|
138
|
+
@property
|
139
|
+
def requester_type(self) -> str:
|
140
|
+
...
|
141
|
+
|
142
|
+
@property
|
143
|
+
def is_logged_in(self) -> bool:
|
144
|
+
"""
|
145
|
+
Indicates whether the requester is currently authenticated.
|
146
|
+
"""
|
147
|
+
...
|
148
|
+
|
149
|
+
@property
|
150
|
+
def login_fields(self) -> list[LoginField]:
|
151
|
+
...
|
152
|
+
|
153
|
+
async def __aexit__(
|
154
|
+
self,
|
155
|
+
exc_type: type[BaseException] | None,
|
156
|
+
exc_val: BaseException | None,
|
157
|
+
tb: types.TracebackType | None,
|
158
|
+
) -> None:
|
159
|
+
...
|
160
|
+
|
161
|
+
async def __aenter__(self) -> Self:
|
162
|
+
...
|
@@ -9,7 +9,7 @@ parsing individual chapter content, and setting parser context via book_id.
|
|
9
9
|
|
10
10
|
from typing import Any, Protocol, runtime_checkable
|
11
11
|
|
12
|
-
from novel_downloader.
|
12
|
+
from novel_downloader.models import ChapterDict
|
13
13
|
|
14
14
|
|
15
15
|
@runtime_checkable
|
@@ -17,33 +17,32 @@ class ParserProtocol(Protocol):
|
|
17
17
|
"""
|
18
18
|
A parser must be able to:
|
19
19
|
- extract book metadata from an HTML string,
|
20
|
-
- extract a single chapter's text from an HTML string
|
21
|
-
- accept a book_id context for multi-step workflows.
|
20
|
+
- extract a single chapter's text from an HTML string
|
22
21
|
"""
|
23
22
|
|
24
23
|
def parse_book_info(
|
25
24
|
self,
|
26
|
-
|
25
|
+
html_list: list[str],
|
27
26
|
**kwargs: Any,
|
28
27
|
) -> dict[str, Any]:
|
29
28
|
"""
|
30
29
|
Parse and return a dictionary of book information from the raw HTML.
|
31
30
|
|
32
|
-
:param
|
31
|
+
:param html_list: The HTML list of a book's info pages.
|
33
32
|
:return: A dict containing metadata like title, author, chapters list, etc.
|
34
33
|
"""
|
35
34
|
...
|
36
35
|
|
37
36
|
def parse_chapter(
|
38
37
|
self,
|
39
|
-
|
38
|
+
html_list: list[str],
|
40
39
|
chapter_id: str,
|
41
40
|
**kwargs: Any,
|
42
41
|
) -> ChapterDict | None:
|
43
42
|
"""
|
44
43
|
Parse and return the text content of one chapter.
|
45
44
|
|
46
|
-
:param
|
45
|
+
:param html_list: The HTML list of the chapter pages.
|
47
46
|
:param chapter_id: Identifier of the chapter being parsed.
|
48
47
|
:return: The chapter's text.
|
49
48
|
"""
|
@@ -9,6 +9,7 @@ for the novel_downloader framework.
|
|
9
9
|
Modules:
|
10
10
|
- biquge (笔趣阁)
|
11
11
|
- esjzone (ESJ Zone)
|
12
|
+
- linovelib (哔哩轻小说)
|
12
13
|
- qianbi (铅笔小说)
|
13
14
|
- qidian (起点中文网)
|
14
15
|
- sfacg (SF轻小说)
|
@@ -19,11 +20,9 @@ Modules:
|
|
19
20
|
from .biquge import BiqugeParser
|
20
21
|
from .common import CommonParser
|
21
22
|
from .esjzone import EsjzoneParser
|
23
|
+
from .linovelib import LinovelibParser
|
22
24
|
from .qianbi import QianbiParser
|
23
|
-
from .qidian import
|
24
|
-
QidianBrowserParser,
|
25
|
-
QidianSessionParser,
|
26
|
-
)
|
25
|
+
from .qidian import QidianParser
|
27
26
|
from .sfacg import SfacgParser
|
28
27
|
from .yamibo import YamiboParser
|
29
28
|
|
@@ -31,9 +30,9 @@ __all__ = [
|
|
31
30
|
"BiqugeParser",
|
32
31
|
"CommonParser",
|
33
32
|
"EsjzoneParser",
|
33
|
+
"LinovelibParser",
|
34
34
|
"QianbiParser",
|
35
|
-
"
|
36
|
-
"QidianSessionParser",
|
35
|
+
"QidianParser",
|
37
36
|
"SfacgParser",
|
38
37
|
"YamiboParser",
|
39
38
|
]
|
@@ -17,9 +17,8 @@ import abc
|
|
17
17
|
from pathlib import Path
|
18
18
|
from typing import Any
|
19
19
|
|
20
|
-
from novel_downloader.config import ParserConfig
|
21
20
|
from novel_downloader.core.interfaces import ParserProtocol
|
22
|
-
from novel_downloader.
|
21
|
+
from novel_downloader.models import ChapterDict, ParserConfig
|
23
22
|
|
24
23
|
|
25
24
|
class BaseParser(ParserProtocol, abc.ABC):
|
@@ -51,33 +50,30 @@ class BaseParser(ParserProtocol, abc.ABC):
|
|
51
50
|
@abc.abstractmethod
|
52
51
|
def parse_book_info(
|
53
52
|
self,
|
54
|
-
|
53
|
+
html_list: list[str],
|
55
54
|
**kwargs: Any,
|
56
55
|
) -> dict[str, Any]:
|
57
56
|
"""
|
58
|
-
Parse a
|
57
|
+
Parse and return a dictionary of book information from the raw HTML.
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
:param html_str: Raw HTML of the book info page.
|
64
|
-
:return: Parsed metadata and chapter structure as a dictionary.
|
59
|
+
:param html_list: The HTML list of a book's info pages.
|
60
|
+
:return: A dict containing metadata like title, author, chapters list, etc.
|
65
61
|
"""
|
66
62
|
...
|
67
63
|
|
68
64
|
@abc.abstractmethod
|
69
65
|
def parse_chapter(
|
70
66
|
self,
|
71
|
-
|
67
|
+
html_list: list[str],
|
72
68
|
chapter_id: str,
|
73
69
|
**kwargs: Any,
|
74
70
|
) -> ChapterDict | None:
|
75
71
|
"""
|
76
|
-
Parse
|
72
|
+
Parse and return the text content of one chapter.
|
77
73
|
|
78
|
-
:param
|
74
|
+
:param html_list: The HTML list of the chapter pages.
|
79
75
|
:param chapter_id: Identifier of the chapter being parsed.
|
80
|
-
:return:
|
76
|
+
:return: The chapter's text.
|
81
77
|
"""
|
82
78
|
...
|
83
79
|
|
@@ -8,11 +8,10 @@ novel_downloader.core.parsers.biquge.main_parser
|
|
8
8
|
import re
|
9
9
|
from typing import Any
|
10
10
|
|
11
|
-
from lxml import
|
12
|
-
from lxml.etree import _Element
|
11
|
+
from lxml import html
|
13
12
|
|
14
13
|
from novel_downloader.core.parsers.base import BaseParser
|
15
|
-
from novel_downloader.
|
14
|
+
from novel_downloader.models import ChapterDict
|
16
15
|
|
17
16
|
|
18
17
|
class BiqugeParser(BaseParser):
|
@@ -20,21 +19,21 @@ class BiqugeParser(BaseParser):
|
|
20
19
|
|
21
20
|
def parse_book_info(
|
22
21
|
self,
|
23
|
-
|
22
|
+
html_list: list[str],
|
24
23
|
**kwargs: Any,
|
25
24
|
) -> dict[str, Any]:
|
26
25
|
"""
|
27
26
|
Parse a book info page and extract metadata and chapter structure.
|
28
27
|
|
29
|
-
:param
|
28
|
+
:param html_list: Raw HTML of the book info page.
|
30
29
|
:return: Parsed metadata and chapter structure as a dictionary.
|
31
30
|
"""
|
32
|
-
if not
|
31
|
+
if not html_list:
|
33
32
|
return {}
|
34
|
-
tree =
|
33
|
+
tree = html.fromstring(html_list[0])
|
35
34
|
result: dict[str, Any] = {}
|
36
35
|
|
37
|
-
def extract_text(elem:
|
36
|
+
def extract_text(elem: html.HtmlElement | None) -> str:
|
38
37
|
if elem is None:
|
39
38
|
return ""
|
40
39
|
return "".join(elem.itertext(tag=None)).strip()
|
@@ -79,7 +78,7 @@ class BiqugeParser(BaseParser):
|
|
79
78
|
text = "".join(elem.itertext()).strip()
|
80
79
|
in_main_volume = "正文" in text
|
81
80
|
elif in_main_volume and elem.tag == "dd":
|
82
|
-
a: list[
|
81
|
+
a: list[html.HtmlElement] = elem.xpath("./a")
|
83
82
|
if a:
|
84
83
|
title = "".join(a[0].itertext(tag=None)).strip()
|
85
84
|
url = a[0].get("href", "").strip()
|
@@ -96,20 +95,20 @@ class BiqugeParser(BaseParser):
|
|
96
95
|
|
97
96
|
def parse_chapter(
|
98
97
|
self,
|
99
|
-
|
98
|
+
html_list: list[str],
|
100
99
|
chapter_id: str,
|
101
100
|
**kwargs: Any,
|
102
101
|
) -> ChapterDict | None:
|
103
102
|
"""
|
104
103
|
Parse a single chapter page and extract clean text or simplified HTML.
|
105
104
|
|
106
|
-
:param
|
105
|
+
:param html_list: Raw HTML of the chapter page.
|
107
106
|
:param chapter_id: Identifier of the chapter being parsed.
|
108
107
|
:return: Cleaned chapter content as plain text or minimal HTML.
|
109
108
|
"""
|
110
|
-
if not
|
109
|
+
if not html_list:
|
111
110
|
return None
|
112
|
-
tree =
|
111
|
+
tree = html.fromstring(html_list[0], parser=None)
|
113
112
|
|
114
113
|
# 提取标题
|
115
114
|
title_elem = tree.xpath('//div[@class="bookname"]/h1')
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.core.parsers.common.
|
4
|
-
|
3
|
+
novel_downloader.core.parsers.common.helper
|
4
|
+
-------------------------------------------
|
5
5
|
|
6
6
|
Shared utility functions for parsing Common pages.
|
7
7
|
"""
|
@@ -13,7 +13,7 @@ from typing import Any, cast
|
|
13
13
|
|
14
14
|
from bs4 import BeautifulSoup, Tag
|
15
15
|
|
16
|
-
from novel_downloader.
|
16
|
+
from novel_downloader.models import (
|
17
17
|
BookInfoRules,
|
18
18
|
FieldRules,
|
19
19
|
RuleStep,
|
@@ -9,11 +9,14 @@ Common pages.
|
|
9
9
|
|
10
10
|
from typing import Any
|
11
11
|
|
12
|
-
from novel_downloader.config import ParserConfig, SiteRules
|
13
12
|
from novel_downloader.core.parsers.base import BaseParser
|
14
|
-
from novel_downloader.
|
13
|
+
from novel_downloader.models import (
|
14
|
+
ChapterDict,
|
15
|
+
ParserConfig,
|
16
|
+
SiteRules,
|
17
|
+
)
|
15
18
|
|
16
|
-
from .helper import HTMLExtractor
|
19
|
+
# from .helper import HTMLExtractor
|
17
20
|
|
18
21
|
|
19
22
|
class CommonParser(BaseParser):
|
@@ -37,58 +40,60 @@ class CommonParser(BaseParser):
|
|
37
40
|
|
38
41
|
def parse_book_info(
|
39
42
|
self,
|
40
|
-
|
43
|
+
html_list: list[str],
|
41
44
|
**kwargs: Any,
|
42
45
|
) -> dict[str, Any]:
|
43
46
|
"""
|
44
47
|
Parse a book info page and extract metadata and chapter structure.
|
45
48
|
|
46
|
-
:param
|
49
|
+
:param html_list: Raw HTML of the book info page.
|
47
50
|
:return: Parsed metadata and chapter structure as a dictionary.
|
48
51
|
"""
|
49
|
-
if not
|
52
|
+
if not html_list:
|
50
53
|
return {}
|
51
|
-
extractor = HTMLExtractor(
|
52
|
-
rules = self._site_rule["book_info"]
|
53
|
-
return extractor.extract_book_info(rules)
|
54
|
+
# extractor = HTMLExtractor(html_list[0])
|
55
|
+
# rules = self._site_rule["book_info"]
|
56
|
+
# return extractor.extract_book_info(rules)
|
57
|
+
return {}
|
54
58
|
|
55
59
|
def parse_chapter(
|
56
60
|
self,
|
57
|
-
|
61
|
+
html_list: list[str],
|
58
62
|
chapter_id: str,
|
59
63
|
**kwargs: Any,
|
60
64
|
) -> ChapterDict | None:
|
61
65
|
"""
|
62
66
|
Parse a single chapter page and extract clean text or simplified HTML.
|
63
67
|
|
64
|
-
:param
|
68
|
+
:param html_list: Raw HTML of the chapter page.
|
65
69
|
:param chapter_id: Identifier of the chapter being parsed.
|
66
70
|
:return: Cleaned chapter content as plain text or minimal HTML.
|
67
71
|
"""
|
68
|
-
if not
|
72
|
+
if not html_list:
|
69
73
|
return None
|
70
|
-
extractor = HTMLExtractor(
|
71
|
-
chapter_rules = self._site_rule["chapter"]
|
72
|
-
|
73
|
-
# 必须有正文内容
|
74
|
-
content_steps = chapter_rules.get("content")
|
75
|
-
if not content_steps:
|
76
|
-
|
77
|
-
|
78
|
-
title_steps = chapter_rules.get("title")
|
79
|
-
title = extractor.extract_field(title_steps["steps"]) if title_steps else ""
|
80
|
-
content = extractor.extract_field(content_steps["steps"])
|
81
|
-
if not content:
|
82
|
-
|
83
|
-
|
84
|
-
return {
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
}
|
74
|
+
# extractor = HTMLExtractor(html_list[0])
|
75
|
+
# chapter_rules = self._site_rule["chapter"]
|
76
|
+
|
77
|
+
# # 必须有正文内容
|
78
|
+
# content_steps = chapter_rules.get("content")
|
79
|
+
# if not content_steps:
|
80
|
+
# raise ValueError(f"No chapter content steps for site: {self._site}")
|
81
|
+
|
82
|
+
# title_steps = chapter_rules.get("title")
|
83
|
+
# title = extractor.extract_field(title_steps["steps"]) if title_steps else ""
|
84
|
+
# content = extractor.extract_field(content_steps["steps"])
|
85
|
+
# if not content:
|
86
|
+
# return None
|
87
|
+
|
88
|
+
# return {
|
89
|
+
# "id": chapter_id,
|
90
|
+
# "title": title or "Untitled",
|
91
|
+
# "content": content,
|
92
|
+
# "extra": {
|
93
|
+
# "site": self._site,
|
94
|
+
# },
|
95
|
+
# }
|
96
|
+
return None
|
92
97
|
|
93
98
|
@property
|
94
99
|
def site(self) -> str:
|