novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +79 -66
- novel_downloader/cli/export.py +17 -21
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +206 -209
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +5 -5
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +17 -12
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +20 -14
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +6 -19
- novel_downloader/core/interfaces/parser.py +7 -8
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +64 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +64 -69
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/main_parser.py +756 -48
- novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +429 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +34 -85
- novel_downloader/locales/zh.json +35 -86
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -24
- novel_downloader/utils/chapter_storage.py +5 -5
- novel_downloader/utils/constants.py +4 -31
- novel_downloader/utils/cookies.py +38 -35
- novel_downloader/utils/crypto_utils/__init__.py +7 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/crypto_utils/rc4.py +54 -0
- novel_downloader/utils/epub/__init__.py +3 -4
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +62 -21
- novel_downloader/utils/epub/documents.py +95 -201
- novel_downloader/utils/epub/models.py +8 -22
- novel_downloader/utils/epub/utils.py +73 -106
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +53 -188
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -15
- novel_downloader/utils/fontocr/__init__.py +5 -14
- novel_downloader/utils/fontocr/core.py +216 -0
- novel_downloader/utils/fontocr/loader.py +50 -0
- novel_downloader/utils/logger.py +81 -65
- novel_downloader/utils/network.py +17 -41
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +55 -49
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.1.dist-info/METADATA +172 -0
- novel_downloader-2.0.1.dist-info/RECORD +206 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/crypto_utils.py +0 -71
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -3,10 +3,10 @@
|
|
3
3
|
novel_downloader.core.interfaces.exporter
|
4
4
|
-----------------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
TXT, EPUB, Markdown, and PDF formats.
|
6
|
+
Protocol defining the interface for exporting books to text, EPUB, and other formats.
|
8
7
|
"""
|
9
8
|
|
9
|
+
from pathlib import Path
|
10
10
|
from typing import Protocol, runtime_checkable
|
11
11
|
|
12
12
|
|
@@ -18,7 +18,7 @@ class ExporterProtocol(Protocol):
|
|
18
18
|
It may also optionally implement an EPUB (or other format) exporter.
|
19
19
|
"""
|
20
20
|
|
21
|
-
def export(self, book_id: str) ->
|
21
|
+
def export(self, book_id: str) -> dict[str, Path]:
|
22
22
|
"""
|
23
23
|
Export the book in the formats specified in config.
|
24
24
|
If a method is not implemented or fails, log the error and continue.
|
@@ -27,7 +27,7 @@ class ExporterProtocol(Protocol):
|
|
27
27
|
"""
|
28
28
|
...
|
29
29
|
|
30
|
-
def export_as_txt(self, book_id: str) -> None:
|
30
|
+
def export_as_txt(self, book_id: str) -> Path | None:
|
31
31
|
"""
|
32
32
|
Persist the assembled book as a .txt file.
|
33
33
|
|
@@ -35,7 +35,7 @@ class ExporterProtocol(Protocol):
|
|
35
35
|
"""
|
36
36
|
...
|
37
37
|
|
38
|
-
def export_as_epub(self, book_id: str) -> None:
|
38
|
+
def export_as_epub(self, book_id: str) -> Path | None:
|
39
39
|
"""
|
40
40
|
Optional: Persist the assembled book as an .epub file.
|
41
41
|
|
@@ -43,7 +43,7 @@ class ExporterProtocol(Protocol):
|
|
43
43
|
"""
|
44
44
|
...
|
45
45
|
|
46
|
-
def export_as_md(self, book_id: str) -> None:
|
46
|
+
def export_as_md(self, book_id: str) -> Path | None:
|
47
47
|
"""
|
48
48
|
Optional: Persist the assembled book as a Markdown (.md) file.
|
49
49
|
|
@@ -51,7 +51,7 @@ class ExporterProtocol(Protocol):
|
|
51
51
|
"""
|
52
52
|
...
|
53
53
|
|
54
|
-
def export_as_pdf(self, book_id: str) -> None:
|
54
|
+
def export_as_pdf(self, book_id: str) -> Path | None:
|
55
55
|
"""
|
56
56
|
Optional: Persist the assembled book as a PDF file.
|
57
57
|
|
@@ -3,8 +3,7 @@
|
|
3
3
|
novel_downloader.core.interfaces.fetcher
|
4
4
|
----------------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
for book info pages, individual chapters, managing request lifecycle
|
6
|
+
Protocol defining the interface for asynchronous fetching, login, and session management
|
8
7
|
"""
|
9
8
|
|
10
9
|
import types
|
@@ -17,8 +16,8 @@ from novel_downloader.models import LoginField
|
|
17
16
|
class FetcherProtocol(Protocol):
|
18
17
|
"""
|
19
18
|
An async requester must be able to fetch raw HTML/data for:
|
20
|
-
|
21
|
-
|
19
|
+
* a book's info page,
|
20
|
+
* a specific chapter page,
|
22
21
|
and manage login/shutdown asynchronously.
|
23
22
|
"""
|
24
23
|
|
@@ -32,6 +31,7 @@ class FetcherProtocol(Protocol):
|
|
32
31
|
) -> bool:
|
33
32
|
"""
|
34
33
|
Attempt to log in asynchronously.
|
34
|
+
|
35
35
|
:returns: True if login succeeded.
|
36
36
|
"""
|
37
37
|
...
|
@@ -45,7 +45,7 @@ class FetcherProtocol(Protocol):
|
|
45
45
|
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
46
46
|
|
47
47
|
:param book_id: The book identifier.
|
48
|
-
:return: The page content as
|
48
|
+
:return: The page content as string list.
|
49
49
|
"""
|
50
50
|
...
|
51
51
|
|
@@ -60,7 +60,7 @@ class FetcherProtocol(Protocol):
|
|
60
60
|
|
61
61
|
:param book_id: The book identifier.
|
62
62
|
:param chapter_id: The chapter identifier.
|
63
|
-
:return: The
|
63
|
+
:return: The page content as string list.
|
64
64
|
"""
|
65
65
|
...
|
66
66
|
|
@@ -126,19 +126,6 @@ class FetcherProtocol(Protocol):
|
|
126
126
|
"""
|
127
127
|
...
|
128
128
|
|
129
|
-
async def set_interactive_mode(self, enable: bool) -> bool:
|
130
|
-
"""
|
131
|
-
Enable or disable interactive mode for manual login.
|
132
|
-
|
133
|
-
:param enable: True to enable, False to disable interactive mode.
|
134
|
-
:return: True if operation or login check succeeded, False otherwise.
|
135
|
-
"""
|
136
|
-
...
|
137
|
-
|
138
|
-
@property
|
139
|
-
def requester_type(self) -> str:
|
140
|
-
...
|
141
|
-
|
142
129
|
@property
|
143
130
|
def is_logged_in(self) -> bool:
|
144
131
|
"""
|
@@ -3,28 +3,27 @@
|
|
3
3
|
novel_downloader.core.interfaces.parser
|
4
4
|
---------------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
parsing individual chapter content, and setting parser context via book_id.
|
6
|
+
Protocol defining the interface for parsing book metadata and chapter content.
|
8
7
|
"""
|
9
8
|
|
10
9
|
from typing import Any, Protocol, runtime_checkable
|
11
10
|
|
12
|
-
from novel_downloader.models import ChapterDict
|
11
|
+
from novel_downloader.models import BookInfoDict, ChapterDict
|
13
12
|
|
14
13
|
|
15
14
|
@runtime_checkable
|
16
15
|
class ParserProtocol(Protocol):
|
17
16
|
"""
|
18
17
|
A parser must be able to:
|
19
|
-
|
20
|
-
|
18
|
+
* extract book metadata from an HTML string,
|
19
|
+
* extract a single chapter's text from an HTML string
|
21
20
|
"""
|
22
21
|
|
23
22
|
def parse_book_info(
|
24
23
|
self,
|
25
24
|
html_list: list[str],
|
26
25
|
**kwargs: Any,
|
27
|
-
) ->
|
26
|
+
) -> BookInfoDict | None:
|
28
27
|
"""
|
29
28
|
Parse and return a dictionary of book information from the raw HTML.
|
30
29
|
|
@@ -40,10 +39,10 @@ class ParserProtocol(Protocol):
|
|
40
39
|
**kwargs: Any,
|
41
40
|
) -> ChapterDict | None:
|
42
41
|
"""
|
43
|
-
Parse and
|
42
|
+
Parse chapter page and extract the content of one chapter.
|
44
43
|
|
45
44
|
:param html_list: The HTML list of the chapter pages.
|
46
45
|
:param chapter_id: Identifier of the chapter being parsed.
|
47
|
-
:return: The chapter's
|
46
|
+
:return: The chapter's data.
|
48
47
|
"""
|
49
48
|
...
|
@@ -3,10 +3,13 @@
|
|
3
3
|
novel_downloader.core.interfaces.searcher
|
4
4
|
-----------------------------------------
|
5
5
|
|
6
|
+
Protocol defining the interface for site search implementations.
|
6
7
|
"""
|
7
8
|
|
8
9
|
from typing import Protocol
|
9
10
|
|
11
|
+
import aiohttp
|
12
|
+
|
10
13
|
from novel_downloader.models import SearchResult
|
11
14
|
|
12
15
|
|
@@ -14,5 +17,10 @@ class SearcherProtocol(Protocol):
|
|
14
17
|
site_name: str
|
15
18
|
|
16
19
|
@classmethod
|
17
|
-
def
|
20
|
+
def configure(cls, session: aiohttp.ClientSession) -> None:
|
21
|
+
"""Configure the shared session"""
|
22
|
+
...
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
async def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
|
18
26
|
...
|
@@ -3,35 +3,72 @@
|
|
3
3
|
novel_downloader.core.parsers
|
4
4
|
-----------------------------
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
Modules:
|
10
|
-
- biquge (笔趣阁)
|
11
|
-
- esjzone (ESJ Zone)
|
12
|
-
- linovelib (哔哩轻小说)
|
13
|
-
- qianbi (铅笔小说)
|
14
|
-
- qidian (起点中文网)
|
15
|
-
- sfacg (SF轻小说)
|
16
|
-
- yamibo (百合会)
|
6
|
+
Parser implementations for extracting book metadata and
|
7
|
+
chapter content from various sources
|
17
8
|
"""
|
18
9
|
|
19
10
|
__all__ = [
|
20
11
|
"get_parser",
|
12
|
+
"AaatxtParser",
|
21
13
|
"BiqugeParser",
|
14
|
+
"BiquyueduParser",
|
15
|
+
"DxmwxParser",
|
16
|
+
"EightnovelParser",
|
22
17
|
"EsjzoneParser",
|
18
|
+
"GuidayeParser",
|
19
|
+
"HetushuParser",
|
20
|
+
"I25zwParser",
|
21
|
+
"Ixdzs8Parser",
|
22
|
+
"Jpxs123Parser",
|
23
|
+
"LewennParser",
|
23
24
|
"LinovelibParser",
|
25
|
+
"PiaotiaParser",
|
26
|
+
"QbtrParser",
|
24
27
|
"QianbiParser",
|
25
28
|
"QidianParser",
|
29
|
+
"Quanben5Parser",
|
26
30
|
"SfacgParser",
|
31
|
+
"ShencouParser",
|
32
|
+
"ShuhaigeParser",
|
33
|
+
"TongrenquanParser",
|
34
|
+
"TtkanParser",
|
35
|
+
"WanbengoParser",
|
36
|
+
"XiaoshuowuParser",
|
37
|
+
"XiguashuwuParser",
|
38
|
+
"Xs63bParser",
|
39
|
+
"XshbookParser",
|
27
40
|
"YamiboParser",
|
41
|
+
"YibigeParser",
|
28
42
|
]
|
29
43
|
|
30
|
-
from .
|
44
|
+
from .aaatxt import AaatxtParser
|
45
|
+
from .b520 import BiqugeParser
|
46
|
+
from .biquyuedu import BiquyueduParser
|
47
|
+
from .dxmwx import DxmwxParser
|
48
|
+
from .eightnovel import EightnovelParser
|
31
49
|
from .esjzone import EsjzoneParser
|
50
|
+
from .guidaye import GuidayeParser
|
51
|
+
from .hetushu import HetushuParser
|
52
|
+
from .i25zw import I25zwParser
|
53
|
+
from .ixdzs8 import Ixdzs8Parser
|
54
|
+
from .jpxs123 import Jpxs123Parser
|
55
|
+
from .lewenn import LewennParser
|
32
56
|
from .linovelib import LinovelibParser
|
57
|
+
from .piaotia import PiaotiaParser
|
58
|
+
from .qbtr import QbtrParser
|
33
59
|
from .qianbi import QianbiParser
|
34
60
|
from .qidian import QidianParser
|
61
|
+
from .quanben5 import Quanben5Parser
|
35
62
|
from .registry import get_parser
|
36
63
|
from .sfacg import SfacgParser
|
64
|
+
from .shencou import ShencouParser
|
65
|
+
from .shuhaige import ShuhaigeParser
|
66
|
+
from .tongrenquan import TongrenquanParser
|
67
|
+
from .ttkan import TtkanParser
|
68
|
+
from .wanbengo import WanbengoParser
|
69
|
+
from .xiaoshuowu import XiaoshuowuParser
|
70
|
+
from .xiguashuwu import XiguashuwuParser
|
71
|
+
from .xs63b import Xs63bParser
|
72
|
+
from .xshbook import XshbookParser
|
37
73
|
from .yamibo import YamiboParser
|
74
|
+
from .yibige import YibigeParser
|
@@ -0,0 +1,132 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.parsers.aaatxt
|
4
|
+
------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.parsers.base import BaseParser
|
13
|
+
from novel_downloader.core.parsers.registry import register_parser
|
14
|
+
from novel_downloader.models import (
|
15
|
+
BookInfoDict,
|
16
|
+
ChapterDict,
|
17
|
+
ChapterInfoDict,
|
18
|
+
VolumeInfoDict,
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
@register_parser(
|
23
|
+
site_keys=["aaatxt"],
|
24
|
+
)
|
25
|
+
class AaatxtParser(BaseParser):
|
26
|
+
"""
|
27
|
+
Parser for 3A电子书 book pages.
|
28
|
+
"""
|
29
|
+
|
30
|
+
ADS: set[str] = {
|
31
|
+
"按键盘上方向键",
|
32
|
+
"未阅读完",
|
33
|
+
"加入书签",
|
34
|
+
"已便下次继续阅读",
|
35
|
+
"更多原创手机电子书",
|
36
|
+
"免费TXT小说下载",
|
37
|
+
}
|
38
|
+
|
39
|
+
def parse_book_info(
|
40
|
+
self,
|
41
|
+
html_list: list[str],
|
42
|
+
**kwargs: Any,
|
43
|
+
) -> BookInfoDict | None:
|
44
|
+
if not html_list:
|
45
|
+
return None
|
46
|
+
|
47
|
+
tree = html.fromstring(html_list[0])
|
48
|
+
|
49
|
+
book_name = self._first_str(tree.xpath("//div[@class='xiazai']/h1/text()"))
|
50
|
+
|
51
|
+
author = self._first_str(tree.xpath("//span[@id='author']/a/text()"))
|
52
|
+
|
53
|
+
cover_url = self._first_str(
|
54
|
+
tree.xpath("//div[@id='txtbook']//div[@class='fm']//img/@src")
|
55
|
+
)
|
56
|
+
|
57
|
+
update_time = self._first_str(
|
58
|
+
tree.xpath("//div[@id='txtbook']//li[contains(text(), '上传日期')]/text()"),
|
59
|
+
replaces=[("上传日期:", "")],
|
60
|
+
)
|
61
|
+
|
62
|
+
genre = self._first_str(
|
63
|
+
tree.xpath("//div[@id='submenu']/h2/a[@class='lan']/text()")
|
64
|
+
)
|
65
|
+
tags = [genre] if genre else []
|
66
|
+
|
67
|
+
summary_el = tree.xpath("//div[@id='jj']//p")
|
68
|
+
summary = summary_el[0].text_content().strip() if summary_el else ""
|
69
|
+
|
70
|
+
download_url = self._first_str(
|
71
|
+
tree.xpath("//div[@id='down']//li[@class='bd']//a/@href")
|
72
|
+
)
|
73
|
+
|
74
|
+
# Chapters from the book_list
|
75
|
+
chapters: list[ChapterInfoDict] = []
|
76
|
+
for a in tree.xpath("//div[@id='ml']//ol/li/a"):
|
77
|
+
url = a.get("href", "").strip()
|
78
|
+
chapter_id = url.split("/")[-1].replace(".html", "")
|
79
|
+
title = a.text_content().strip()
|
80
|
+
chapters.append(
|
81
|
+
{
|
82
|
+
"title": title,
|
83
|
+
"url": url,
|
84
|
+
"chapterId": chapter_id,
|
85
|
+
}
|
86
|
+
)
|
87
|
+
|
88
|
+
volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
|
89
|
+
|
90
|
+
return {
|
91
|
+
"book_name": book_name,
|
92
|
+
"author": author,
|
93
|
+
"cover_url": cover_url,
|
94
|
+
"update_time": update_time,
|
95
|
+
"tags": tags,
|
96
|
+
"summary": summary,
|
97
|
+
"volumes": volumes,
|
98
|
+
"extra": {"download_url": download_url},
|
99
|
+
}
|
100
|
+
|
101
|
+
def parse_chapter(
|
102
|
+
self,
|
103
|
+
html_list: list[str],
|
104
|
+
chapter_id: str,
|
105
|
+
**kwargs: Any,
|
106
|
+
) -> ChapterDict | None:
|
107
|
+
if not html_list:
|
108
|
+
return None
|
109
|
+
|
110
|
+
tree = html.fromstring(html_list[0])
|
111
|
+
|
112
|
+
raw_title = self._first_str(tree.xpath("//div[@id='content']//h1/text()"))
|
113
|
+
title = raw_title.split("-", 1)[-1].strip()
|
114
|
+
|
115
|
+
texts = []
|
116
|
+
for txt in tree.xpath("//div[@class='chapter']//text()"):
|
117
|
+
line = txt.strip()
|
118
|
+
# Skip empty/instruction/ad lines
|
119
|
+
if not line or self._is_ad_line(txt):
|
120
|
+
continue
|
121
|
+
texts.append(line)
|
122
|
+
|
123
|
+
content = "\n".join(texts)
|
124
|
+
if not content:
|
125
|
+
return None
|
126
|
+
|
127
|
+
return {
|
128
|
+
"id": chapter_id,
|
129
|
+
"title": title,
|
130
|
+
"content": content,
|
131
|
+
"extra": {"site": "aaatxt"},
|
132
|
+
}
|
@@ -0,0 +1,116 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.parsers.b520
|
4
|
+
----------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.parsers.base import BaseParser
|
13
|
+
from novel_downloader.core.parsers.registry import register_parser
|
14
|
+
from novel_downloader.models import (
|
15
|
+
BookInfoDict,
|
16
|
+
ChapterDict,
|
17
|
+
ChapterInfoDict,
|
18
|
+
VolumeInfoDict,
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
@register_parser(
|
23
|
+
site_keys=["biquge", "bqg", "b520"],
|
24
|
+
)
|
25
|
+
class BiqugeParser(BaseParser):
|
26
|
+
"""
|
27
|
+
Parser for 笔趣阁 book pages.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def parse_book_info(
|
31
|
+
self,
|
32
|
+
html_list: list[str],
|
33
|
+
**kwargs: Any,
|
34
|
+
) -> BookInfoDict | None:
|
35
|
+
if not html_list:
|
36
|
+
return None
|
37
|
+
|
38
|
+
tree = html.fromstring(html_list[0])
|
39
|
+
|
40
|
+
book_name = self._first_str(tree.xpath('//div[@id="info"]/h1/text()'))
|
41
|
+
|
42
|
+
author = self._first_str(
|
43
|
+
tree.xpath('//div[@id="info"]/p[1]/text()'),
|
44
|
+
replaces=[("\xa0", ""), ("作者:", "")],
|
45
|
+
)
|
46
|
+
|
47
|
+
cover_url = self._first_str(tree.xpath('//div[@id="fmimg"]/img/@src'))
|
48
|
+
|
49
|
+
update_time = self._first_str(
|
50
|
+
tree.xpath('//div[@id="info"]/p[3]/text()'),
|
51
|
+
replaces=[("最后更新:", "")],
|
52
|
+
)
|
53
|
+
|
54
|
+
intro_elem = tree.xpath('//div[@id="intro"]')
|
55
|
+
summary = "".join(intro_elem[0].itertext()).strip() if intro_elem else ""
|
56
|
+
|
57
|
+
book_type = self._first_str(tree.xpath('//div[@class="con_top"]/a[2]/text()'))
|
58
|
+
tags = [book_type] if book_type else []
|
59
|
+
|
60
|
+
chapters: list[ChapterInfoDict] = [
|
61
|
+
{
|
62
|
+
"title": (a.text or "").strip(),
|
63
|
+
"url": (a.get("href") or "").strip(),
|
64
|
+
"chapterId": (a.get("href") or "").rsplit("/", 1)[-1].split(".", 1)[0],
|
65
|
+
}
|
66
|
+
for a in tree.xpath(
|
67
|
+
'//div[@id="list"]/dl/dt[contains(., "正文")]/following-sibling::dd/a'
|
68
|
+
)
|
69
|
+
]
|
70
|
+
|
71
|
+
volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
|
72
|
+
|
73
|
+
return {
|
74
|
+
"book_name": book_name,
|
75
|
+
"author": author,
|
76
|
+
"cover_url": cover_url,
|
77
|
+
"update_time": update_time,
|
78
|
+
"tags": tags,
|
79
|
+
"summary": summary,
|
80
|
+
"volumes": volumes,
|
81
|
+
"extra": {},
|
82
|
+
}
|
83
|
+
|
84
|
+
def parse_chapter(
|
85
|
+
self,
|
86
|
+
html_list: list[str],
|
87
|
+
chapter_id: str,
|
88
|
+
**kwargs: Any,
|
89
|
+
) -> ChapterDict | None:
|
90
|
+
if not html_list:
|
91
|
+
return None
|
92
|
+
tree = html.fromstring(html_list[0])
|
93
|
+
|
94
|
+
title = self._first_str(tree.xpath('//div[@class="bookname"]/h1/text()'))
|
95
|
+
if not title:
|
96
|
+
title = f"第 {chapter_id} 章"
|
97
|
+
|
98
|
+
content_elem = tree.xpath('//div[@id="content"]')
|
99
|
+
if not content_elem:
|
100
|
+
return None
|
101
|
+
paragraphs = [
|
102
|
+
"".join(p.itertext()).strip() for p in content_elem[0].xpath(".//p")
|
103
|
+
]
|
104
|
+
if paragraphs and "www.shuhaige.net" in paragraphs[-1]:
|
105
|
+
paragraphs.pop()
|
106
|
+
|
107
|
+
content = "\n".join(paragraphs)
|
108
|
+
if not content.strip():
|
109
|
+
return None
|
110
|
+
|
111
|
+
return {
|
112
|
+
"id": chapter_id,
|
113
|
+
"title": title,
|
114
|
+
"content": content,
|
115
|
+
"extra": {"site": "biquge"},
|
116
|
+
}
|
@@ -3,22 +3,17 @@
|
|
3
3
|
novel_downloader.core.parsers.base
|
4
4
|
----------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
ParserProtocol interface and provides a structured foundation for
|
8
|
-
site-specific parsers.
|
9
|
-
|
10
|
-
BaseParser manages internal parser state and enforces
|
11
|
-
a standard parsing interface for:
|
12
|
-
- Book info pages (e.g. metadata, chapter list)
|
13
|
-
- Chapter pages (e.g. textual content)
|
6
|
+
Abstract base class providing common behavior for site-specific parsers.
|
14
7
|
"""
|
15
8
|
|
16
9
|
import abc
|
10
|
+
import re
|
11
|
+
from collections.abc import Iterable
|
17
12
|
from pathlib import Path
|
18
13
|
from typing import Any
|
19
14
|
|
20
15
|
from novel_downloader.core.interfaces import ParserProtocol
|
21
|
-
from novel_downloader.models import ChapterDict, ParserConfig
|
16
|
+
from novel_downloader.models import BookInfoDict, ChapterDict, ParserConfig
|
22
17
|
|
23
18
|
|
24
19
|
class BaseParser(ParserProtocol, abc.ABC):
|
@@ -32,6 +27,10 @@ class BaseParser(ParserProtocol, abc.ABC):
|
|
32
27
|
Subclasses must implement actual parsing logic for specific sites.
|
33
28
|
"""
|
34
29
|
|
30
|
+
ADS: set[str] = set()
|
31
|
+
|
32
|
+
_SPACE_RE = re.compile(r"\s+")
|
33
|
+
|
35
34
|
def __init__(
|
36
35
|
self,
|
37
36
|
config: ParserConfig,
|
@@ -44,15 +43,20 @@ class BaseParser(ParserProtocol, abc.ABC):
|
|
44
43
|
self._config = config
|
45
44
|
self._book_id: str | None = None
|
46
45
|
|
46
|
+
self._save_font_debug = config.save_font_debug
|
47
|
+
self._decode_font: bool = config.decode_font
|
48
|
+
self._use_truncation = config.use_truncation
|
47
49
|
self._base_cache_dir = Path(config.cache_dir)
|
48
50
|
self._cache_dir = self._base_cache_dir
|
49
51
|
|
52
|
+
self._ad_pattern = self._compile_ads_pattern()
|
53
|
+
|
50
54
|
@abc.abstractmethod
|
51
55
|
def parse_book_info(
|
52
56
|
self,
|
53
57
|
html_list: list[str],
|
54
58
|
**kwargs: Any,
|
55
|
-
) ->
|
59
|
+
) -> BookInfoDict | None:
|
56
60
|
"""
|
57
61
|
Parse and return a dictionary of book information from the raw HTML.
|
58
62
|
|
@@ -69,11 +73,11 @@ class BaseParser(ParserProtocol, abc.ABC):
|
|
69
73
|
**kwargs: Any,
|
70
74
|
) -> ChapterDict | None:
|
71
75
|
"""
|
72
|
-
Parse and
|
76
|
+
Parse chapter page and extract the content of one chapter.
|
73
77
|
|
74
78
|
:param html_list: The HTML list of the chapter pages.
|
75
79
|
:param chapter_id: Identifier of the chapter being parsed.
|
76
|
-
:return: The chapter's
|
80
|
+
:return: The chapter's data.
|
77
81
|
"""
|
78
82
|
...
|
79
83
|
|
@@ -104,3 +108,51 @@ class BaseParser(ParserProtocol, abc.ABC):
|
|
104
108
|
book-related folders or states.
|
105
109
|
"""
|
106
110
|
pass
|
111
|
+
|
112
|
+
def _compile_ads_pattern(self) -> re.Pattern[str] | None:
|
113
|
+
"""
|
114
|
+
Compile a regex pattern from the ADS list, or return None if no ADS.
|
115
|
+
"""
|
116
|
+
if not self.ADS:
|
117
|
+
return None
|
118
|
+
|
119
|
+
return re.compile("|".join(map(re.escape, self.ADS)))
|
120
|
+
|
121
|
+
def _is_ad_line(self, line: str) -> bool:
|
122
|
+
"""
|
123
|
+
Check if a line contains any ad text.
|
124
|
+
|
125
|
+
:param line: Single text line.
|
126
|
+
:return: True if line matches ad pattern, else False.
|
127
|
+
"""
|
128
|
+
return bool(self._ad_pattern and self._ad_pattern.search(line))
|
129
|
+
|
130
|
+
def _filter_ads(self, lines: Iterable[str]) -> list[str]:
|
131
|
+
"""
|
132
|
+
Filter out lines containing any ad text defined in ADS.
|
133
|
+
|
134
|
+
:param lines: Iterable of text lines (e.g. chapter content).
|
135
|
+
:return: List of lines with ads removed.
|
136
|
+
"""
|
137
|
+
if not self._ad_pattern:
|
138
|
+
return list(lines)
|
139
|
+
return [line for line in lines if not self._ad_pattern.search(line)]
|
140
|
+
|
141
|
+
@classmethod
|
142
|
+
def _norm_space(cls, s: str, c: str = " ") -> str:
|
143
|
+
"""
|
144
|
+
collapse any run of whitespace (incl. newlines, full-width spaces)
|
145
|
+
|
146
|
+
:param s: Input string to normalize.
|
147
|
+
:param c: Replacement character to use for collapsed whitespace.
|
148
|
+
"""
|
149
|
+
return cls._SPACE_RE.sub(c, s).strip()
|
150
|
+
|
151
|
+
@staticmethod
|
152
|
+
def _first_str(xs: list[str], replaces: list[tuple[str, str]] | None = None) -> str:
|
153
|
+
replaces = replaces or []
|
154
|
+
value: str = xs[0].strip() if xs else ""
|
155
|
+
for replace in replaces:
|
156
|
+
old, new = replace
|
157
|
+
value = value.replace(old, new)
|
158
|
+
return value
|