novel-downloader 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +14 -0
- novel_downloader/cli/__init__.py +14 -0
- novel_downloader/cli/clean.py +134 -0
- novel_downloader/cli/download.py +132 -0
- novel_downloader/cli/interactive.py +67 -0
- novel_downloader/cli/main.py +45 -0
- novel_downloader/cli/settings.py +177 -0
- novel_downloader/config/__init__.py +52 -0
- novel_downloader/config/adapter.py +153 -0
- novel_downloader/config/loader.py +177 -0
- novel_downloader/config/models.py +173 -0
- novel_downloader/config/site_rules.py +97 -0
- novel_downloader/core/__init__.py +25 -0
- novel_downloader/core/downloaders/__init__.py +22 -0
- novel_downloader/core/downloaders/base_async_downloader.py +157 -0
- novel_downloader/core/downloaders/base_downloader.py +187 -0
- novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
- novel_downloader/core/downloaders/common_downloader.py +191 -0
- novel_downloader/core/downloaders/qidian_downloader.py +208 -0
- novel_downloader/core/factory/__init__.py +33 -0
- novel_downloader/core/factory/downloader_factory.py +149 -0
- novel_downloader/core/factory/parser_factory.py +62 -0
- novel_downloader/core/factory/requester_factory.py +106 -0
- novel_downloader/core/factory/saver_factory.py +49 -0
- novel_downloader/core/interfaces/__init__.py +32 -0
- novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/async_requester_protocol.py +68 -0
- novel_downloader/core/interfaces/downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/parser_protocol.py +40 -0
- novel_downloader/core/interfaces/requester_protocol.py +65 -0
- novel_downloader/core/interfaces/saver_protocol.py +61 -0
- novel_downloader/core/parsers/__init__.py +28 -0
- novel_downloader/core/parsers/base_parser.py +96 -0
- novel_downloader/core/parsers/common_parser/__init__.py +14 -0
- novel_downloader/core/parsers/common_parser/helper.py +321 -0
- novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
- novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
- novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
- novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
- novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
- novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
- novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
- novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
- novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
- novel_downloader/core/requesters/__init__.py +31 -0
- novel_downloader/core/requesters/base_async_session.py +297 -0
- novel_downloader/core/requesters/base_browser.py +210 -0
- novel_downloader/core/requesters/base_session.py +243 -0
- novel_downloader/core/requesters/common_requester/__init__.py +18 -0
- novel_downloader/core/requesters/common_requester/common_async_session.py +96 -0
- novel_downloader/core/requesters/common_requester/common_session.py +126 -0
- novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
- novel_downloader/core/savers/__init__.py +20 -0
- novel_downloader/core/savers/base_saver.py +169 -0
- novel_downloader/core/savers/common_saver/__init__.py +13 -0
- novel_downloader/core/savers/common_saver/common_epub.py +232 -0
- novel_downloader/core/savers/common_saver/common_txt.py +176 -0
- novel_downloader/core/savers/common_saver/main_saver.py +86 -0
- novel_downloader/core/savers/epub_utils/__init__.py +27 -0
- novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
- novel_downloader/core/savers/epub_utils/initializer.py +98 -0
- novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
- novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
- novel_downloader/core/savers/qidian_saver.py +22 -0
- novel_downloader/locales/en.json +91 -0
- novel_downloader/locales/zh.json +91 -0
- novel_downloader/resources/config/rules.toml +196 -0
- novel_downloader/resources/config/settings.yaml +73 -0
- novel_downloader/resources/css_styles/main.css +104 -0
- novel_downloader/resources/css_styles/volume-intro.css +56 -0
- novel_downloader/resources/images/volume_border.png +0 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
- novel_downloader/resources/json/replace_word_map.json +4 -0
- novel_downloader/resources/text/blacklist.txt +22 -0
- novel_downloader/utils/__init__.py +0 -0
- novel_downloader/utils/cache.py +24 -0
- novel_downloader/utils/constants.py +158 -0
- novel_downloader/utils/crypto_utils.py +144 -0
- novel_downloader/utils/file_utils/__init__.py +43 -0
- novel_downloader/utils/file_utils/io.py +252 -0
- novel_downloader/utils/file_utils/normalize.py +68 -0
- novel_downloader/utils/file_utils/sanitize.py +77 -0
- novel_downloader/utils/fontocr/__init__.py +23 -0
- novel_downloader/utils/fontocr/ocr_v1.py +304 -0
- novel_downloader/utils/fontocr/ocr_v2.py +658 -0
- novel_downloader/utils/hash_store.py +288 -0
- novel_downloader/utils/hash_utils.py +103 -0
- novel_downloader/utils/i18n.py +41 -0
- novel_downloader/utils/logger.py +104 -0
- novel_downloader/utils/model_loader.py +72 -0
- novel_downloader/utils/network.py +287 -0
- novel_downloader/utils/state.py +156 -0
- novel_downloader/utils/text_utils/__init__.py +27 -0
- novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
- novel_downloader/utils/text_utils/diff_display.py +75 -0
- novel_downloader/utils/text_utils/font_mapping.py +31 -0
- novel_downloader/utils/text_utils/text_cleaning.py +57 -0
- novel_downloader/utils/time_utils/__init__.py +22 -0
- novel_downloader/utils/time_utils/datetime_utils.py +146 -0
- novel_downloader/utils/time_utils/sleep_utils.py +49 -0
- novel_downloader-1.1.0.dist-info/METADATA +157 -0
- novel_downloader-1.1.0.dist-info/RECORD +115 -0
- novel_downloader-1.1.0.dist-info/WHEEL +5 -0
- novel_downloader-1.1.0.dist-info/entry_points.txt +2 -0
- novel_downloader-1.1.0.dist-info/licenses/LICENSE +21 -0
- novel_downloader-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,106 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.factory.requester_factory
|
5
|
+
-----------------------------------------------
|
6
|
+
|
7
|
+
This module implements a factory function for retrieving requester instances
|
8
|
+
based on the target novel platform (site).
|
9
|
+
|
10
|
+
- get_async_requester -> returns AsyncRequesterProtocol
|
11
|
+
- get_sync_requester -> returns RequesterProtocol
|
12
|
+
- get_requester -> dispatches to one of the above based on config.mode
|
13
|
+
|
14
|
+
To add support for new sites or modes, extend the `_site_map` accordingly.
|
15
|
+
"""
|
16
|
+
|
17
|
+
from typing import Callable, Union
|
18
|
+
|
19
|
+
from novel_downloader.config import RequesterConfig, load_site_rules
|
20
|
+
from novel_downloader.core.interfaces import AsyncRequesterProtocol, RequesterProtocol
|
21
|
+
from novel_downloader.core.requesters import (
|
22
|
+
CommonAsyncSession,
|
23
|
+
CommonSession,
|
24
|
+
QidianBrowser,
|
25
|
+
QidianSession,
|
26
|
+
)
|
27
|
+
|
28
|
+
_site_map: dict[
|
29
|
+
str,
|
30
|
+
dict[str, Callable[[RequesterConfig], RequesterProtocol]],
|
31
|
+
] = {
|
32
|
+
"qidian": {
|
33
|
+
"session": QidianSession,
|
34
|
+
"browser": QidianBrowser,
|
35
|
+
},
|
36
|
+
}
|
37
|
+
|
38
|
+
|
39
|
+
def get_async_requester(
|
40
|
+
site: str,
|
41
|
+
config: RequesterConfig,
|
42
|
+
) -> AsyncRequesterProtocol:
|
43
|
+
"""
|
44
|
+
Returns an AsyncRequesterProtocol for the given site.
|
45
|
+
|
46
|
+
:param site: Site name (e.g., 'qidian')
|
47
|
+
:param config: Configuration for the requester
|
48
|
+
:return: An instance of a requester class
|
49
|
+
"""
|
50
|
+
site_key = site.lower()
|
51
|
+
site_rules = load_site_rules()
|
52
|
+
site_rule = site_rules.get(site_key)
|
53
|
+
if site_rule is None:
|
54
|
+
raise ValueError(f"Unsupported site: {site}")
|
55
|
+
profile = site_rule["profile"]
|
56
|
+
return CommonAsyncSession(config, site_key, profile)
|
57
|
+
|
58
|
+
|
59
|
+
def get_sync_requester(
|
60
|
+
site: str,
|
61
|
+
config: RequesterConfig,
|
62
|
+
) -> RequesterProtocol:
|
63
|
+
"""
|
64
|
+
Returns a RequesterProtocol for the given site.
|
65
|
+
|
66
|
+
:param site: Site name (e.g., 'qidian')
|
67
|
+
:param config: Configuration for the requester
|
68
|
+
:return: An instance of a requester class
|
69
|
+
"""
|
70
|
+
site_key = site.lower()
|
71
|
+
site_entry = _site_map.get(site_key)
|
72
|
+
|
73
|
+
# site-specific implementation for this mode
|
74
|
+
if site_entry:
|
75
|
+
cls = site_entry.get(config.mode)
|
76
|
+
if cls:
|
77
|
+
return cls(config)
|
78
|
+
|
79
|
+
# fallback to CommonSession
|
80
|
+
site_rules = load_site_rules()
|
81
|
+
site_rule = site_rules.get(site_key)
|
82
|
+
if site_rule is None:
|
83
|
+
raise ValueError(f"Unsupported site: {site}")
|
84
|
+
profile = site_rule["profile"]
|
85
|
+
return CommonSession(config, site_key, profile)
|
86
|
+
|
87
|
+
|
88
|
+
def get_requester(
|
89
|
+
site: str,
|
90
|
+
config: RequesterConfig,
|
91
|
+
) -> Union[AsyncRequesterProtocol, RequesterProtocol]:
|
92
|
+
"""
|
93
|
+
Dispatches to either get_async_requester or get_sync_requester
|
94
|
+
based on config.mode. Treats 'browser' and 'async' as async modes,
|
95
|
+
'session' as sync; anything else is an error.
|
96
|
+
|
97
|
+
:param site: Site name (e.g., 'qidian')
|
98
|
+
:param config: Configuration for the requester
|
99
|
+
:return: An instance of a requester class
|
100
|
+
"""
|
101
|
+
mode = config.mode.lower()
|
102
|
+
if mode == "async":
|
103
|
+
return get_async_requester(site, config)
|
104
|
+
if mode in ("browser", "session"):
|
105
|
+
return get_sync_requester(site, config)
|
106
|
+
raise ValueError(f"Unknown mode '{config.mode}' for site '{site}'")
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.factory.parser_factory
|
5
|
+
--------------------------------------------
|
6
|
+
|
7
|
+
This module implements a factory function for creating saver instances
|
8
|
+
based on the site name and parser mode specified in the configuration.
|
9
|
+
|
10
|
+
Currently supported:
|
11
|
+
- Site: 'qidian'
|
12
|
+
- QidianSaver
|
13
|
+
|
14
|
+
To add support for new sites or modes, extend the `_site_map` accordingly.
|
15
|
+
"""
|
16
|
+
|
17
|
+
from novel_downloader.config import SaverConfig, load_site_rules
|
18
|
+
from novel_downloader.core.interfaces import SaverProtocol
|
19
|
+
from novel_downloader.core.savers import (
|
20
|
+
CommonSaver,
|
21
|
+
QidianSaver,
|
22
|
+
)
|
23
|
+
|
24
|
+
_site_map = {
|
25
|
+
"qidian": QidianSaver,
|
26
|
+
# "biquge": ...
|
27
|
+
}
|
28
|
+
|
29
|
+
|
30
|
+
def get_saver(site: str, config: SaverConfig) -> SaverProtocol:
|
31
|
+
"""
|
32
|
+
Returns a site-specific saver instance.
|
33
|
+
|
34
|
+
:param site: Site name (e.g., 'qidian')
|
35
|
+
:param config: Configuration for the saver
|
36
|
+
:return: An instance of a saver class
|
37
|
+
"""
|
38
|
+
site_key = site.lower()
|
39
|
+
|
40
|
+
saver_class = _site_map.get(site_key)
|
41
|
+
if saver_class:
|
42
|
+
return saver_class(config)
|
43
|
+
|
44
|
+
# Fallback: check site_rules
|
45
|
+
site_rules = load_site_rules()
|
46
|
+
if site_key not in site_rules:
|
47
|
+
raise ValueError(f"Unsupported site: {site}")
|
48
|
+
|
49
|
+
return CommonSaver(config, site_key)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.interfaces
|
5
|
+
--------------------------------
|
6
|
+
|
7
|
+
This package centralizes the protocol definitions used across the
|
8
|
+
system to promote interface-based design and type-safe dependency
|
9
|
+
injection.
|
10
|
+
|
11
|
+
Included protocols:
|
12
|
+
- DownloaderProtocol
|
13
|
+
- ParserProtocol
|
14
|
+
- RequesterProtocol
|
15
|
+
- SaverProtocol
|
16
|
+
"""
|
17
|
+
|
18
|
+
from .async_downloader_protocol import AsyncDownloaderProtocol
|
19
|
+
from .async_requester_protocol import AsyncRequesterProtocol
|
20
|
+
from .downloader_protocol import DownloaderProtocol
|
21
|
+
from .parser_protocol import ParserProtocol
|
22
|
+
from .requester_protocol import RequesterProtocol
|
23
|
+
from .saver_protocol import SaverProtocol
|
24
|
+
|
25
|
+
__all__ = [
|
26
|
+
"AsyncDownloaderProtocol",
|
27
|
+
"AsyncRequesterProtocol",
|
28
|
+
"DownloaderProtocol",
|
29
|
+
"ParserProtocol",
|
30
|
+
"RequesterProtocol",
|
31
|
+
"SaverProtocol",
|
32
|
+
]
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.interfaces.async_downloader_protocol
|
5
|
+
----------------------------------------------------------
|
6
|
+
|
7
|
+
This module defines the AsyncDownloaderProtocol, a structural interface
|
8
|
+
that outlines the expected behavior of any downloader class.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from typing import List, Protocol
|
12
|
+
|
13
|
+
|
14
|
+
class AsyncDownloaderProtocol(Protocol):
|
15
|
+
"""
|
16
|
+
Protocol for fully‐asynchronous downloader classes.
|
17
|
+
|
18
|
+
Defines the expected interface for any downloader implementation,
|
19
|
+
including both batch and single book downloads,
|
20
|
+
as well as optional pre-download hooks.
|
21
|
+
"""
|
22
|
+
|
23
|
+
async def download(self, book_ids: List[str]) -> None:
|
24
|
+
"""
|
25
|
+
Batch download entry point.
|
26
|
+
|
27
|
+
:param book_ids: List of book IDs to download.
|
28
|
+
"""
|
29
|
+
...
|
30
|
+
|
31
|
+
async def download_one(self, book_id: str) -> None:
|
32
|
+
"""
|
33
|
+
Download logic for a single book.
|
34
|
+
|
35
|
+
:param book_id: The identifier of the book.
|
36
|
+
"""
|
37
|
+
...
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.interfaces.async_requester_protocol
|
5
|
+
--------------------------------------------------------
|
6
|
+
|
7
|
+
Defines the AsyncRequesterProtocol interface for fetching raw HTML or JSON
|
8
|
+
for book info pages, individual chapters, managing request lifecycle,
|
9
|
+
and optionally retrieving a user's authenticated bookcase — all in async style.
|
10
|
+
"""
|
11
|
+
|
12
|
+
from typing import Optional, Protocol, runtime_checkable
|
13
|
+
|
14
|
+
|
15
|
+
@runtime_checkable
|
16
|
+
class AsyncRequesterProtocol(Protocol):
|
17
|
+
"""
|
18
|
+
An async requester must be able to fetch raw HTML/data for:
|
19
|
+
- a book's info page,
|
20
|
+
- a specific chapter page,
|
21
|
+
and manage login/shutdown asynchronously.
|
22
|
+
"""
|
23
|
+
|
24
|
+
async def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
|
25
|
+
"""
|
26
|
+
Attempt to log in asynchronously.
|
27
|
+
:returns: True if login succeeded.
|
28
|
+
"""
|
29
|
+
...
|
30
|
+
|
31
|
+
async def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
|
32
|
+
"""
|
33
|
+
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
34
|
+
|
35
|
+
:param book_id: The book identifier.
|
36
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
37
|
+
:return: The page content as a string.
|
38
|
+
"""
|
39
|
+
...
|
40
|
+
|
41
|
+
async def get_book_chapter(
|
42
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
|
43
|
+
) -> str:
|
44
|
+
"""
|
45
|
+
Fetch the raw HTML (or JSON) of a single chapter asynchronously.
|
46
|
+
|
47
|
+
:param book_id: The book identifier.
|
48
|
+
:param chapter_id: The chapter identifier.
|
49
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
50
|
+
:return: The chapter content as a string.
|
51
|
+
"""
|
52
|
+
...
|
53
|
+
|
54
|
+
async def get_bookcase(self, wait_time: Optional[int] = None) -> str:
|
55
|
+
"""
|
56
|
+
Optional: Retrieve the HTML content of the authenticated
|
57
|
+
user's bookcase page asynchronously.
|
58
|
+
|
59
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
60
|
+
:return: The HTML markup of the bookcase page.
|
61
|
+
"""
|
62
|
+
...
|
63
|
+
|
64
|
+
async def shutdown(self) -> None:
|
65
|
+
"""
|
66
|
+
Shutdown and clean up any resources (e.g., close aiohttp session).
|
67
|
+
"""
|
68
|
+
...
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.interfaces.downloader_protocol
|
5
|
+
----------------------------------------------------
|
6
|
+
|
7
|
+
This module defines the DownloaderProtocol, a structural interface
|
8
|
+
that outlines the expected behavior of any downloader class.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from typing import List, Protocol
|
12
|
+
|
13
|
+
|
14
|
+
class DownloaderProtocol(Protocol):
|
15
|
+
"""
|
16
|
+
Protocol for downloader classes.
|
17
|
+
|
18
|
+
Defines the expected interface for any downloader implementation,
|
19
|
+
including both batch and single book downloads,
|
20
|
+
as well as optional pre-download hooks.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def download(self, book_ids: List[str]) -> None:
|
24
|
+
"""
|
25
|
+
Batch download entry point.
|
26
|
+
|
27
|
+
:param book_ids: List of book IDs to download.
|
28
|
+
"""
|
29
|
+
...
|
30
|
+
|
31
|
+
def download_one(self, book_id: str) -> None:
|
32
|
+
"""
|
33
|
+
Download logic for a single book.
|
34
|
+
|
35
|
+
:param book_id: The identifier of the book.
|
36
|
+
"""
|
37
|
+
...
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.interfaces.parser_protocol
|
5
|
+
------------------------------------------------
|
6
|
+
|
7
|
+
Defines the ParserProtocol interface for extracting book metadata,
|
8
|
+
parsing individual chapter content, and setting parser context via book_id.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from typing import Any, Dict, Protocol, runtime_checkable
|
12
|
+
|
13
|
+
|
14
|
+
@runtime_checkable
|
15
|
+
class ParserProtocol(Protocol):
|
16
|
+
"""
|
17
|
+
A parser must be able to:
|
18
|
+
- extract book metadata from an HTML string,
|
19
|
+
- extract a single chapter's text from an HTML string,
|
20
|
+
- accept a book_id context for multi-step workflows.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def parse_book_info(self, html_str: str) -> Dict[str, Any]:
|
24
|
+
"""
|
25
|
+
Parse and return a dictionary of book information from the raw HTML.
|
26
|
+
|
27
|
+
:param html_str: The HTML of a book's info page.
|
28
|
+
:return: A dict containing metadata like title, author, chapters list, etc.
|
29
|
+
"""
|
30
|
+
...
|
31
|
+
|
32
|
+
def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
|
33
|
+
"""
|
34
|
+
Parse and return the text content of one chapter.
|
35
|
+
|
36
|
+
:param html_str: The HTML of the chapter page.
|
37
|
+
:param chapter_id: Identifier of the chapter being parsed.
|
38
|
+
:return: The chapter's text.
|
39
|
+
"""
|
40
|
+
...
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.interfaces.requester_protocol
|
5
|
+
--------------------------------------------------
|
6
|
+
|
7
|
+
Defines the RequesterProtocol interface for fetching raw HTML or JSON
|
8
|
+
for book info pages, individual chapters, managing request lifecycle,
|
9
|
+
and optionally retrieving a user's authenticated bookcase.
|
10
|
+
"""
|
11
|
+
|
12
|
+
from typing import Optional, Protocol, runtime_checkable
|
13
|
+
|
14
|
+
|
15
|
+
@runtime_checkable
|
16
|
+
class RequesterProtocol(Protocol):
|
17
|
+
"""
|
18
|
+
A requester must be able to fetch raw HTML/data for:
|
19
|
+
- a book's info page,
|
20
|
+
- a specific chapter page.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
|
24
|
+
"""
|
25
|
+
Attempt to log in
|
26
|
+
"""
|
27
|
+
...
|
28
|
+
|
29
|
+
def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
|
30
|
+
"""
|
31
|
+
Fetch the raw HTML (or JSON) of the book info page.
|
32
|
+
|
33
|
+
:param book_id: The book identifier.
|
34
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
35
|
+
:return: The page content as a string.
|
36
|
+
"""
|
37
|
+
...
|
38
|
+
|
39
|
+
def get_book_chapter(
|
40
|
+
self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
|
41
|
+
) -> str:
|
42
|
+
"""
|
43
|
+
Fetch the raw HTML (or JSON) of a single chapter.
|
44
|
+
|
45
|
+
:param book_id: The book identifier.
|
46
|
+
:param chapter_id: The chapter identifier.
|
47
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
48
|
+
:return: The chapter content as a string.
|
49
|
+
"""
|
50
|
+
...
|
51
|
+
|
52
|
+
def shutdown(self) -> None:
|
53
|
+
"""
|
54
|
+
Shutdown and cleans up resources.
|
55
|
+
"""
|
56
|
+
...
|
57
|
+
|
58
|
+
def get_bookcase(self, wait_time: Optional[int] = None) -> str:
|
59
|
+
"""
|
60
|
+
Optional: Retrieve the HTML content of the authenticated user's bookcase page.
|
61
|
+
|
62
|
+
:param wait_time: Base number of seconds to wait before returning content.
|
63
|
+
:return: The HTML markup of the bookcase page.
|
64
|
+
"""
|
65
|
+
...
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.interfaces.saver_protocol
|
5
|
+
------------------------------------------------
|
6
|
+
|
7
|
+
Defines the SaverProtocol interface for persisting completed books in
|
8
|
+
TXT, EPUB, Markdown, and PDF formats.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from typing import Protocol, runtime_checkable
|
12
|
+
|
13
|
+
|
14
|
+
@runtime_checkable
|
15
|
+
class SaverProtocol(Protocol):
|
16
|
+
"""
|
17
|
+
A saver must implement a method to persist a completed book as plain text.
|
18
|
+
|
19
|
+
It may also optionally implement an EPUB (or other format) saver.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def save(self, book_id: str) -> None:
|
23
|
+
"""
|
24
|
+
Save the book in the formats specified in config.
|
25
|
+
If a method is not implemented or fails, log the error and continue.
|
26
|
+
|
27
|
+
:param book_id: The book identifier (used for filename, lookup, etc.)
|
28
|
+
"""
|
29
|
+
...
|
30
|
+
|
31
|
+
def save_as_txt(self, book_id: str) -> None:
|
32
|
+
"""
|
33
|
+
Persist the assembled book as a .txt file.
|
34
|
+
|
35
|
+
:param book_id: The book identifier (used for filename or lookup).
|
36
|
+
"""
|
37
|
+
...
|
38
|
+
|
39
|
+
def save_as_epub(self, book_id: str) -> None:
|
40
|
+
"""
|
41
|
+
Optional: Persist the assembled book as an .epub file.
|
42
|
+
|
43
|
+
:param book_id: The book identifier.
|
44
|
+
"""
|
45
|
+
...
|
46
|
+
|
47
|
+
def save_as_md(self, book_id: str) -> None:
|
48
|
+
"""
|
49
|
+
Optional: Persist the assembled book as a Markdown (.md) file.
|
50
|
+
|
51
|
+
:param book_id: The book identifier.
|
52
|
+
"""
|
53
|
+
...
|
54
|
+
|
55
|
+
def save_as_pdf(self, book_id: str) -> None:
|
56
|
+
"""
|
57
|
+
Optional: Persist the assembled book as a PDF file.
|
58
|
+
|
59
|
+
:param book_id: The book identifier.
|
60
|
+
"""
|
61
|
+
...
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.parsers
|
5
|
+
-----------------------------
|
6
|
+
|
7
|
+
This package defines all site-specific parsing modules
|
8
|
+
for the novel_downloader framework.
|
9
|
+
|
10
|
+
Currently supported:
|
11
|
+
- Qidian (起点中文网) via browser-rendered page parsing.
|
12
|
+
|
13
|
+
Modules:
|
14
|
+
- qidian_parser
|
15
|
+
- common_parser
|
16
|
+
"""
|
17
|
+
|
18
|
+
from .common_parser import CommonParser
|
19
|
+
from .qidian_parser import (
|
20
|
+
QidianBrowserParser,
|
21
|
+
QidianSessionParser,
|
22
|
+
)
|
23
|
+
|
24
|
+
__all__ = [
|
25
|
+
"CommonParser",
|
26
|
+
"QidianBrowserParser",
|
27
|
+
"QidianSessionParser",
|
28
|
+
]
|
@@ -0,0 +1,96 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.parsers.base_parser
|
5
|
+
-----------------------------------------
|
6
|
+
|
7
|
+
This module defines the BaseParser abstract class, which implements the
|
8
|
+
ParserProtocol interface and provides a structured foundation for
|
9
|
+
site-specific parsers.
|
10
|
+
|
11
|
+
BaseParser manages internal parser state and enforces
|
12
|
+
a standard parsing interface for:
|
13
|
+
- Book info pages (e.g. metadata, chapter list)
|
14
|
+
- Chapter pages (e.g. textual content)
|
15
|
+
"""
|
16
|
+
|
17
|
+
import abc
|
18
|
+
from pathlib import Path
|
19
|
+
from typing import Any, Dict, Optional
|
20
|
+
|
21
|
+
from novel_downloader.config import ParserConfig
|
22
|
+
from novel_downloader.core.interfaces import ParserProtocol
|
23
|
+
|
24
|
+
|
25
|
+
class BaseParser(ParserProtocol, abc.ABC):
|
26
|
+
"""
|
27
|
+
BaseParser defines the interface for extracting book metadata and chapter content
|
28
|
+
from raw HTML.
|
29
|
+
|
30
|
+
This base class manages internal book state (e.g. current book ID) and supports
|
31
|
+
configuration-driven behavior such as content cleaning or formatting.
|
32
|
+
|
33
|
+
Subclasses must implement actual parsing logic for specific sites.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(self, config: ParserConfig):
|
37
|
+
"""
|
38
|
+
Initialize the parser with a configuration object.
|
39
|
+
|
40
|
+
:param config: ParserConfig object controlling parsing behavior.
|
41
|
+
"""
|
42
|
+
self._config = config
|
43
|
+
self._book_id: Optional[str] = None
|
44
|
+
|
45
|
+
self._base_cache_dir = Path(config.cache_dir)
|
46
|
+
|
47
|
+
@abc.abstractmethod
|
48
|
+
def parse_book_info(self, html: str) -> Dict[str, Any]:
|
49
|
+
"""
|
50
|
+
Parse a book info page and extract metadata and chapter structure.
|
51
|
+
|
52
|
+
Depending on the site structure, the return dict may include a
|
53
|
+
flat `chapters` list or nested `volumes` with chapter groups.
|
54
|
+
|
55
|
+
:param html: Raw HTML of the book info page.
|
56
|
+
:return: Parsed metadata and chapter structure as a dictionary.
|
57
|
+
"""
|
58
|
+
...
|
59
|
+
|
60
|
+
@abc.abstractmethod
|
61
|
+
def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
|
62
|
+
"""
|
63
|
+
Parse a single chapter page and extract clean text or simplified HTML.
|
64
|
+
|
65
|
+
:param html: Raw HTML of the chapter page.
|
66
|
+
:param chapter_id: Identifier of the chapter being parsed.
|
67
|
+
:return: Cleaned chapter content as plain text or minimal HTML.
|
68
|
+
"""
|
69
|
+
...
|
70
|
+
|
71
|
+
@property
|
72
|
+
def book_id(self) -> Optional[str]:
|
73
|
+
"""
|
74
|
+
Current book ID in context.
|
75
|
+
|
76
|
+
:return: The current book identifier.
|
77
|
+
"""
|
78
|
+
return self._book_id
|
79
|
+
|
80
|
+
@book_id.setter
|
81
|
+
def book_id(self, value: str) -> None:
|
82
|
+
"""
|
83
|
+
Set current book ID and update debug paths if needed.
|
84
|
+
|
85
|
+
:param value: Book identifier.
|
86
|
+
"""
|
87
|
+
self._book_id = value
|
88
|
+
self._on_book_id_set()
|
89
|
+
|
90
|
+
def _on_book_id_set(self) -> None:
|
91
|
+
"""
|
92
|
+
Hook called when a new book ID is set.
|
93
|
+
Subclasses can override this to initialize
|
94
|
+
book-related folders or states.
|
95
|
+
"""
|
96
|
+
pass
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.parsers.common_parser
|
5
|
+
-------------------------------------------
|
6
|
+
|
7
|
+
This module provides a CommonParser class that implements
|
8
|
+
general-purpose parsing logic for extracting novel metadata
|
9
|
+
and chapter content based on site-specific rules.
|
10
|
+
"""
|
11
|
+
|
12
|
+
from .main_parser import CommonParser
|
13
|
+
|
14
|
+
__all__ = ["CommonParser"]
|