novel-downloader 1.3.3__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/clean.py +97 -78
- novel_downloader/cli/config.py +177 -0
- novel_downloader/cli/download.py +132 -87
- novel_downloader/cli/export.py +77 -0
- novel_downloader/cli/main.py +21 -28
- novel_downloader/config/__init__.py +1 -25
- novel_downloader/config/adapter.py +32 -31
- novel_downloader/config/loader.py +3 -3
- novel_downloader/config/site_rules.py +1 -2
- novel_downloader/core/__init__.py +3 -6
- novel_downloader/core/downloaders/__init__.py +10 -13
- novel_downloader/core/downloaders/base.py +233 -0
- novel_downloader/core/downloaders/biquge.py +27 -0
- novel_downloader/core/downloaders/common.py +414 -0
- novel_downloader/core/downloaders/esjzone.py +27 -0
- novel_downloader/core/downloaders/linovelib.py +27 -0
- novel_downloader/core/downloaders/qianbi.py +27 -0
- novel_downloader/core/downloaders/qidian.py +352 -0
- novel_downloader/core/downloaders/sfacg.py +27 -0
- novel_downloader/core/downloaders/yamibo.py +27 -0
- novel_downloader/core/exporters/__init__.py +37 -0
- novel_downloader/core/{savers → exporters}/base.py +73 -39
- novel_downloader/core/exporters/biquge.py +25 -0
- novel_downloader/core/exporters/common/__init__.py +12 -0
- novel_downloader/core/{savers → exporters}/common/epub.py +22 -22
- novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +35 -40
- novel_downloader/core/{savers → exporters}/common/txt.py +20 -23
- novel_downloader/core/{savers → exporters}/epub_utils/__init__.py +8 -3
- novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -2
- novel_downloader/core/{savers → exporters}/epub_utils/image_loader.py +46 -4
- novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -4
- novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +3 -3
- novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -2
- novel_downloader/core/exporters/esjzone.py +25 -0
- novel_downloader/core/exporters/linovelib/__init__.py +10 -0
- novel_downloader/core/exporters/linovelib/epub.py +449 -0
- novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
- novel_downloader/core/exporters/linovelib/txt.py +129 -0
- novel_downloader/core/exporters/qianbi.py +25 -0
- novel_downloader/core/{savers → exporters}/qidian.py +8 -8
- novel_downloader/core/exporters/sfacg.py +25 -0
- novel_downloader/core/exporters/yamibo.py +25 -0
- novel_downloader/core/factory/__init__.py +5 -17
- novel_downloader/core/factory/downloader.py +24 -126
- novel_downloader/core/factory/exporter.py +58 -0
- novel_downloader/core/factory/fetcher.py +96 -0
- novel_downloader/core/factory/parser.py +17 -12
- novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
- novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
- novel_downloader/core/fetchers/base/browser.py +383 -0
- novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
- novel_downloader/core/fetchers/base/session.py +419 -0
- novel_downloader/core/fetchers/biquge/__init__.py +14 -0
- novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
- novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
- novel_downloader/core/fetchers/common/__init__.py +14 -0
- novel_downloader/core/fetchers/common/browser.py +79 -0
- novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
- novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
- novel_downloader/core/fetchers/esjzone/browser.py +202 -0
- novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
- novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
- novel_downloader/core/fetchers/linovelib/browser.py +193 -0
- novel_downloader/core/fetchers/linovelib/session.py +193 -0
- novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
- novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
- novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
- novel_downloader/core/fetchers/qidian/__init__.py +14 -0
- novel_downloader/core/fetchers/qidian/browser.py +266 -0
- novel_downloader/core/fetchers/qidian/session.py +326 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
- novel_downloader/core/fetchers/sfacg/browser.py +189 -0
- novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
- novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
- novel_downloader/core/fetchers/yamibo/browser.py +229 -0
- novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
- novel_downloader/core/interfaces/__init__.py +8 -12
- novel_downloader/core/interfaces/downloader.py +54 -0
- novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
- novel_downloader/core/interfaces/fetcher.py +162 -0
- novel_downloader/core/interfaces/parser.py +6 -7
- novel_downloader/core/parsers/__init__.py +5 -6
- novel_downloader/core/parsers/base.py +9 -13
- novel_downloader/core/parsers/biquge/main_parser.py +12 -13
- novel_downloader/core/parsers/common/helper.py +3 -3
- novel_downloader/core/parsers/common/main_parser.py +39 -34
- novel_downloader/core/parsers/esjzone/main_parser.py +20 -14
- novel_downloader/core/parsers/linovelib/__init__.py +10 -0
- novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
- novel_downloader/core/parsers/qidian/__init__.py +2 -11
- novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
- novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
- novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
- novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
- novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
- novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
- novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
- novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
- novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
- novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
- novel_downloader/locales/en.json +18 -2
- novel_downloader/locales/zh.json +18 -2
- novel_downloader/models/__init__.py +64 -0
- novel_downloader/models/browser.py +21 -0
- novel_downloader/models/chapter.py +25 -0
- novel_downloader/models/config.py +100 -0
- novel_downloader/models/login.py +20 -0
- novel_downloader/models/site_rules.py +99 -0
- novel_downloader/models/tasks.py +33 -0
- novel_downloader/models/types.py +15 -0
- novel_downloader/resources/config/settings.toml +31 -25
- novel_downloader/resources/json/linovelib_font_map.json +3573 -0
- novel_downloader/tui/__init__.py +7 -0
- novel_downloader/tui/app.py +32 -0
- novel_downloader/tui/main.py +17 -0
- novel_downloader/tui/screens/__init__.py +14 -0
- novel_downloader/tui/screens/home.py +191 -0
- novel_downloader/tui/screens/login.py +74 -0
- novel_downloader/tui/styles/home_layout.tcss +79 -0
- novel_downloader/tui/widgets/richlog_handler.py +24 -0
- novel_downloader/utils/__init__.py +6 -0
- novel_downloader/utils/chapter_storage.py +25 -38
- novel_downloader/utils/constants.py +11 -5
- novel_downloader/utils/cookies.py +66 -0
- novel_downloader/utils/crypto_utils.py +1 -74
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -2
- novel_downloader/utils/hash_store.py +10 -18
- novel_downloader/utils/hash_utils.py +3 -2
- novel_downloader/utils/logger.py +2 -3
- novel_downloader/utils/network.py +2 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -1
- novel_downloader/utils/text_utils/text_cleaning.py +1 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -3
- novel_downloader/utils/time_utils/sleep_utils.py +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/METADATA +69 -35
- novel_downloader-1.4.1.dist-info/RECORD +170 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/WHEEL +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/entry_points.txt +1 -0
- novel_downloader/cli/interactive.py +0 -66
- novel_downloader/cli/settings.py +0 -177
- novel_downloader/config/models.py +0 -187
- novel_downloader/core/downloaders/base/__init__.py +0 -14
- novel_downloader/core/downloaders/base/base_async.py +0 -153
- novel_downloader/core/downloaders/base/base_sync.py +0 -208
- novel_downloader/core/downloaders/biquge/__init__.py +0 -14
- novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
- novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
- novel_downloader/core/downloaders/common/__init__.py +0 -14
- novel_downloader/core/downloaders/common/common_async.py +0 -210
- novel_downloader/core/downloaders/common/common_sync.py +0 -202
- novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
- novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
- novel_downloader/core/downloaders/qidian/__init__.py +0 -10
- novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -219
- novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
- novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
- novel_downloader/core/factory/requester.py +0 -144
- novel_downloader/core/factory/saver.py +0 -56
- novel_downloader/core/interfaces/async_downloader.py +0 -36
- novel_downloader/core/interfaces/async_requester.py +0 -84
- novel_downloader/core/interfaces/sync_downloader.py +0 -36
- novel_downloader/core/interfaces/sync_requester.py +0 -82
- novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
- novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
- novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
- novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
- novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
- novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
- novel_downloader/core/requesters/base/async_session.py +0 -410
- novel_downloader/core/requesters/base/browser.py +0 -337
- novel_downloader/core/requesters/base/session.py +0 -378
- novel_downloader/core/requesters/biquge/__init__.py +0 -14
- novel_downloader/core/requesters/common/__init__.py +0 -17
- novel_downloader/core/requesters/common/session.py +0 -113
- novel_downloader/core/requesters/esjzone/__init__.py +0 -13
- novel_downloader/core/requesters/esjzone/session.py +0 -235
- novel_downloader/core/requesters/qianbi/__init__.py +0 -13
- novel_downloader/core/requesters/qidian/__init__.py +0 -21
- novel_downloader/core/requesters/qidian/broswer.py +0 -307
- novel_downloader/core/requesters/qidian/session.py +0 -290
- novel_downloader/core/requesters/sfacg/__init__.py +0 -13
- novel_downloader/core/requesters/sfacg/session.py +0 -242
- novel_downloader/core/requesters/yamibo/__init__.py +0 -13
- novel_downloader/core/requesters/yamibo/session.py +0 -237
- novel_downloader/core/savers/__init__.py +0 -34
- novel_downloader/core/savers/biquge.py +0 -25
- novel_downloader/core/savers/common/__init__.py +0 -12
- novel_downloader/core/savers/esjzone.py +0 -25
- novel_downloader/core/savers/qianbi.py +0 -25
- novel_downloader/core/savers/sfacg.py +0 -25
- novel_downloader/core/savers/yamibo.py +0 -25
- novel_downloader/resources/config/rules.toml +0 -196
- novel_downloader-1.3.3.dist-info/RECORD +0 -166
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/top_level.txt +0 -0
@@ -7,10 +7,10 @@ novel_downloader.core.parsers.sfacg.main_parser
|
|
7
7
|
|
8
8
|
from typing import Any
|
9
9
|
|
10
|
-
from lxml import
|
10
|
+
from lxml import html
|
11
11
|
|
12
12
|
from novel_downloader.core.parsers.base import BaseParser
|
13
|
-
from novel_downloader.
|
13
|
+
from novel_downloader.models import ChapterDict
|
14
14
|
|
15
15
|
|
16
16
|
class SfacgParser(BaseParser):
|
@@ -40,20 +40,20 @@ class SfacgParser(BaseParser):
|
|
40
40
|
|
41
41
|
def parse_book_info(
|
42
42
|
self,
|
43
|
-
|
43
|
+
html_list: list[str],
|
44
44
|
**kwargs: Any,
|
45
45
|
) -> dict[str, Any]:
|
46
46
|
"""
|
47
47
|
Parse a book info page and extract metadata and chapter structure.
|
48
48
|
|
49
|
-
:param
|
49
|
+
:param html_list: Raw HTML of the book info page.
|
50
50
|
:return: Parsed metadata and chapter structure as a dictionary.
|
51
51
|
"""
|
52
|
-
if len(
|
52
|
+
if len(html_list) < 2:
|
53
53
|
return {}
|
54
54
|
|
55
|
-
info_tree =
|
56
|
-
catalog_tree =
|
55
|
+
info_tree = html.fromstring(html_list[0])
|
56
|
+
catalog_tree = html.fromstring(html_list[1])
|
57
57
|
|
58
58
|
result: dict[str, Any] = {}
|
59
59
|
|
@@ -113,25 +113,25 @@ class SfacgParser(BaseParser):
|
|
113
113
|
|
114
114
|
def parse_chapter(
|
115
115
|
self,
|
116
|
-
|
116
|
+
html_list: list[str],
|
117
117
|
chapter_id: str,
|
118
118
|
**kwargs: Any,
|
119
119
|
) -> ChapterDict | None:
|
120
120
|
"""
|
121
121
|
Parse a single chapter page and extract clean text or simplified HTML.
|
122
122
|
|
123
|
-
:param
|
123
|
+
:param html_list: Raw HTML of the chapter page.
|
124
124
|
:param chapter_id: Identifier of the chapter being parsed.
|
125
125
|
:return: Cleaned chapter content as plain text or minimal HTML.
|
126
126
|
"""
|
127
|
-
if not
|
127
|
+
if not html_list:
|
128
128
|
return None
|
129
129
|
keywords = [
|
130
130
|
"本章为VIP章节", # 本章为VIP章节,订阅后可立即阅读
|
131
131
|
]
|
132
|
-
if any(kw in
|
132
|
+
if any(kw in html_list[0] for kw in keywords):
|
133
133
|
return None
|
134
|
-
tree =
|
134
|
+
tree = html.fromstring(html_list[0])
|
135
135
|
|
136
136
|
content_lines: list[str] = []
|
137
137
|
content_nodes = tree.xpath(self._CHAPTER_CONTENT_NODES_XPATH)
|
@@ -7,10 +7,10 @@ novel_downloader.core.parsers.yamibo.main_parser
|
|
7
7
|
|
8
8
|
from typing import Any
|
9
9
|
|
10
|
-
from lxml import
|
10
|
+
from lxml import html
|
11
11
|
|
12
12
|
from novel_downloader.core.parsers.base import BaseParser
|
13
|
-
from novel_downloader.
|
13
|
+
from novel_downloader.models import ChapterDict
|
14
14
|
|
15
15
|
|
16
16
|
class YamiboParser(BaseParser):
|
@@ -49,19 +49,19 @@ class YamiboParser(BaseParser):
|
|
49
49
|
|
50
50
|
def parse_book_info(
|
51
51
|
self,
|
52
|
-
|
52
|
+
html_list: list[str],
|
53
53
|
**kwargs: Any,
|
54
54
|
) -> dict[str, Any]:
|
55
55
|
"""
|
56
56
|
Parse a book info page and extract metadata and chapter structure.
|
57
57
|
|
58
|
-
:param
|
58
|
+
:param html_list: Raw HTML of the book info page.
|
59
59
|
:return: Parsed metadata and chapter structure as a dictionary.
|
60
60
|
"""
|
61
|
-
if not
|
61
|
+
if not html_list:
|
62
62
|
return {}
|
63
63
|
|
64
|
-
tree =
|
64
|
+
tree = html.fromstring(html_list[0])
|
65
65
|
result: dict[str, Any] = {}
|
66
66
|
|
67
67
|
result["book_name"] = tree.xpath(self._BOOK_NAME_XPATH).strip()
|
@@ -151,20 +151,20 @@ class YamiboParser(BaseParser):
|
|
151
151
|
|
152
152
|
def parse_chapter(
|
153
153
|
self,
|
154
|
-
|
154
|
+
html_list: list[str],
|
155
155
|
chapter_id: str,
|
156
156
|
**kwargs: Any,
|
157
157
|
) -> ChapterDict | None:
|
158
158
|
"""
|
159
159
|
Parse a single chapter page and extract clean text or simplified HTML.
|
160
160
|
|
161
|
-
:param
|
161
|
+
:param html_list: Raw HTML of the chapter page.
|
162
162
|
:param chapter_id: Identifier of the chapter being parsed.
|
163
163
|
:return: Cleaned chapter content as plain text or minimal HTML.
|
164
164
|
"""
|
165
|
-
if not
|
165
|
+
if not html_list:
|
166
166
|
return None
|
167
|
-
tree =
|
167
|
+
tree = html.fromstring(html_list[0])
|
168
168
|
|
169
169
|
content_lines = tree.xpath(self._CHAPTER_CONTENT_XPATH)
|
170
170
|
content = "\n\n".join(line.strip() for line in content_lines if line.strip())
|
novel_downloader/locales/en.json
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
"help_config": "Path to config file",
|
4
4
|
"help_download": "Download novels",
|
5
5
|
"help_clean": "Clean cache and configuration files",
|
6
|
-
"
|
6
|
+
"help_export": "Export downloaded novels",
|
7
7
|
|
8
8
|
"settings_help": "Configure downloader settings.",
|
9
9
|
"settings_set_lang_help": "Switch language between Chinese and English.",
|
@@ -65,6 +65,15 @@
|
|
65
65
|
"download_edit_config": "Please edit your config and replace them with real book IDs.",
|
66
66
|
"download_downloading": "Downloading book {book_id} from {site}...",
|
67
67
|
"download_prompt_parse": "Parse...",
|
68
|
+
"download_book_ids": "One or more book IDs to process",
|
69
|
+
"login_description": "Description",
|
70
|
+
"login_hint": "Hint",
|
71
|
+
"login_manual_prompt": ">> Please complete login in your browser and press Enter to continue...",
|
72
|
+
"login_use_config": "Using value from config.",
|
73
|
+
"login_enter_password": "Enter password: ",
|
74
|
+
"login_enter_cookie": "Enter cookies: ",
|
75
|
+
"login_enter_value": "Enter value: ",
|
76
|
+
"login_required_field": "This field is required. Please enter a value.",
|
68
77
|
|
69
78
|
"login_prompt_intro": "Manual login is required. Please switch to the browser and log in.",
|
70
79
|
"login_prompt_press_enter": "Attempt {attempt}/{max_retries}: Press Enter after completing login in the browser...",
|
@@ -96,5 +105,12 @@
|
|
96
105
|
"clean_nothing": "No clean option specified",
|
97
106
|
"clean_deleted": "Deleted",
|
98
107
|
"clean_not_found": "Not found",
|
99
|
-
"clean_cancelled": "Clean operation cancelled"
|
108
|
+
"clean_cancelled": "Clean operation cancelled",
|
109
|
+
|
110
|
+
"export_format_help": "Export format: txt, epub, or all",
|
111
|
+
"export_processing": "Exporting book {book_id} in format: {format}",
|
112
|
+
"export_success_txt": "Successfully exported {book_id} as TXT.",
|
113
|
+
"export_failed_txt": "Failed to export {book_id} as TXT: {err}",
|
114
|
+
"export_success_epub": "Successfully exported {book_id} as EPUB.",
|
115
|
+
"export_failed_epub": "Failed to export {book_id} as EPUB: {err}"
|
100
116
|
}
|
novel_downloader/locales/zh.json
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
"help_config": "配置文件路径",
|
4
4
|
"help_download": "下载小说",
|
5
5
|
"help_clean": "清理缓存和配置文件",
|
6
|
-
"
|
6
|
+
"help_export": "导出已下载的小说",
|
7
7
|
|
8
8
|
"settings_help": "配置下载器设置",
|
9
9
|
"settings_set_lang_help": "在中文和英文之间切换语言",
|
@@ -65,6 +65,15 @@
|
|
65
65
|
"download_edit_config": "请编辑配置并将示例 ID 替换为真实书籍 ID",
|
66
66
|
"download_downloading": "正在从 {site} 下载书籍 {book_id}...",
|
67
67
|
"download_prompt_parse": "结束...",
|
68
|
+
"download_book_ids": "要处理的一个或多个小说 ID",
|
69
|
+
"login_description": "说明",
|
70
|
+
"login_hint": "提示",
|
71
|
+
"login_manual_prompt": ">> 请在浏览器中完成登录后按回车继续...",
|
72
|
+
"login_use_config": "使用配置中的默认值",
|
73
|
+
"login_enter_password": "请输入密码: ",
|
74
|
+
"login_enter_cookie": "请输入 Cookie: ",
|
75
|
+
"login_enter_value": "请输入值: ",
|
76
|
+
"login_required_field": "该字段是必填项, 请重新输入",
|
68
77
|
|
69
78
|
"login_prompt_intro": "需要手动登录, 请切换到浏览器窗口完成登录",
|
70
79
|
"login_prompt_press_enter": "第 {attempt}/{max_retries} 次尝试: 请在浏览器中完成登录后按回车键...",
|
@@ -96,5 +105,12 @@
|
|
96
105
|
"clean_nothing": "未指定任何要清理的内容",
|
97
106
|
"clean_deleted": "已删除",
|
98
107
|
"clean_not_found": "未找到",
|
99
|
-
"clean_cancelled": "已取消清理操作"
|
108
|
+
"clean_cancelled": "已取消清理操作",
|
109
|
+
|
110
|
+
"export_format_help": "导出格式: txt、epub 或 all",
|
111
|
+
"export_processing": "正在导出小说 {book_id}, 格式: {format}",
|
112
|
+
"export_success_txt": "成功将 {book_id} 导出为 TXT。",
|
113
|
+
"export_failed_txt": "导出 {book_id} 为 TXT 失败: {err}",
|
114
|
+
"export_success_epub": "成功将 {book_id} 导出为 EPUB",
|
115
|
+
"export_failed_epub": "导出 {book_id} 为 EPUB 失败: {err}"
|
100
116
|
}
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.models
|
4
|
+
-----------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from .browser import NewContextOptions
|
9
|
+
from .chapter import ChapterDict
|
10
|
+
from .config import (
|
11
|
+
DownloaderConfig,
|
12
|
+
ExporterConfig,
|
13
|
+
FetcherConfig,
|
14
|
+
ParserConfig,
|
15
|
+
)
|
16
|
+
from .login import LoginField
|
17
|
+
from .site_rules import (
|
18
|
+
BookInfoRules,
|
19
|
+
FieldRules,
|
20
|
+
RuleStep,
|
21
|
+
SiteProfile,
|
22
|
+
SiteRules,
|
23
|
+
SiteRulesDict,
|
24
|
+
VolumesRules,
|
25
|
+
)
|
26
|
+
from .tasks import (
|
27
|
+
CidTask,
|
28
|
+
HtmlTask,
|
29
|
+
RestoreTask,
|
30
|
+
)
|
31
|
+
from .types import (
|
32
|
+
BrowserType,
|
33
|
+
LogLevel,
|
34
|
+
ModeType,
|
35
|
+
SaveMode,
|
36
|
+
SplitMode,
|
37
|
+
StorageBackend,
|
38
|
+
)
|
39
|
+
|
40
|
+
__all__ = [
|
41
|
+
"NewContextOptions",
|
42
|
+
"DownloaderConfig",
|
43
|
+
"ParserConfig",
|
44
|
+
"FetcherConfig",
|
45
|
+
"ExporterConfig",
|
46
|
+
"ChapterDict",
|
47
|
+
"LoginField",
|
48
|
+
"BrowserType",
|
49
|
+
"ModeType",
|
50
|
+
"SaveMode",
|
51
|
+
"StorageBackend",
|
52
|
+
"SplitMode",
|
53
|
+
"LogLevel",
|
54
|
+
"BookInfoRules",
|
55
|
+
"FieldRules",
|
56
|
+
"RuleStep",
|
57
|
+
"SiteProfile",
|
58
|
+
"SiteRules",
|
59
|
+
"SiteRulesDict",
|
60
|
+
"VolumesRules",
|
61
|
+
"CidTask",
|
62
|
+
"HtmlTask",
|
63
|
+
"RestoreTask",
|
64
|
+
]
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.models.browser
|
4
|
+
-------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from pathlib import Path
|
9
|
+
from typing import TypedDict
|
10
|
+
|
11
|
+
from playwright.async_api import ViewportSize
|
12
|
+
|
13
|
+
|
14
|
+
class NewContextOptions(TypedDict, total=False):
|
15
|
+
user_agent: str
|
16
|
+
locale: str
|
17
|
+
storage_state: Path
|
18
|
+
viewport: ViewportSize
|
19
|
+
java_script_enabled: bool
|
20
|
+
ignore_https_errors: bool
|
21
|
+
extra_http_headers: dict[str, str]
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.models.chapter
|
4
|
+
-------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, TypedDict
|
9
|
+
|
10
|
+
|
11
|
+
class ChapterDict(TypedDict, total=True):
|
12
|
+
"""
|
13
|
+
TypedDict for a novel chapter.
|
14
|
+
|
15
|
+
Fields:
|
16
|
+
id -- Unique chapter identifier
|
17
|
+
title -- Chapter title
|
18
|
+
content -- Chapter text
|
19
|
+
extra -- Arbitrary metadata (e.g. author remarks, timestamps)
|
20
|
+
"""
|
21
|
+
|
22
|
+
id: str
|
23
|
+
title: str
|
24
|
+
content: str
|
25
|
+
extra: dict[str, Any]
|
@@ -0,0 +1,100 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.models.config
|
4
|
+
------------------------------
|
5
|
+
|
6
|
+
Defines structured configuration models using dataclasses for each
|
7
|
+
major component in the novel_downloader pipeline.
|
8
|
+
|
9
|
+
Each config section corresponds to a specific stage of the pipeline:
|
10
|
+
- RequesterConfig: network settings for requests and DrissionPage
|
11
|
+
- DownloaderConfig: chapter download behavior and local raw data paths
|
12
|
+
- ParserConfig: font decoding, cache handling, and debug options
|
13
|
+
- SaverConfig: output formatting, export formats, and filename templates
|
14
|
+
|
15
|
+
These models are used to map loaded YAML or JSON config data into
|
16
|
+
strongly typed Python objects for safer and cleaner access.
|
17
|
+
"""
|
18
|
+
|
19
|
+
from dataclasses import dataclass
|
20
|
+
|
21
|
+
from .types import (
|
22
|
+
BrowserType,
|
23
|
+
ModeType,
|
24
|
+
SplitMode,
|
25
|
+
StorageBackend,
|
26
|
+
)
|
27
|
+
|
28
|
+
|
29
|
+
@dataclass
|
30
|
+
class FetcherConfig:
|
31
|
+
request_interval: float = 2.0
|
32
|
+
retry_times: int = 3
|
33
|
+
backoff_factor: float = 2.0
|
34
|
+
timeout: float = 30.0
|
35
|
+
headless: bool = False
|
36
|
+
disable_images: bool = False
|
37
|
+
mode: ModeType = "session"
|
38
|
+
max_connections: int = 10
|
39
|
+
max_rps: float | None = None # Maximum requests per second
|
40
|
+
proxy: str | None = None
|
41
|
+
user_agent: str | None = None
|
42
|
+
headers: dict[str, str] | None = None
|
43
|
+
browser_type: BrowserType = "chromium"
|
44
|
+
verify_ssl: bool = True
|
45
|
+
|
46
|
+
|
47
|
+
@dataclass
|
48
|
+
class DownloaderConfig:
|
49
|
+
request_interval: float = 2.0
|
50
|
+
retry_times: int = 3
|
51
|
+
backoff_factor: float = 2.0
|
52
|
+
raw_data_dir: str = "./raw_data"
|
53
|
+
cache_dir: str = "./novel_cache"
|
54
|
+
download_workers: int = 4
|
55
|
+
parser_workers: int = 4
|
56
|
+
skip_existing: bool = True
|
57
|
+
login_required: bool = False
|
58
|
+
save_html: bool = False
|
59
|
+
mode: ModeType = "session"
|
60
|
+
storage_backend: StorageBackend = "json"
|
61
|
+
storage_batch_size: int = 1
|
62
|
+
username: str = ""
|
63
|
+
password: str = ""
|
64
|
+
cookies: str = ""
|
65
|
+
|
66
|
+
|
67
|
+
@dataclass
|
68
|
+
class ParserConfig:
|
69
|
+
cache_dir: str = "./novel_cache"
|
70
|
+
decode_font: bool = False
|
71
|
+
use_freq: bool = False
|
72
|
+
use_ocr: bool = True
|
73
|
+
use_vec: bool = False
|
74
|
+
ocr_version: str = "v1.0"
|
75
|
+
batch_size: int = 32
|
76
|
+
gpu_mem: int = 500
|
77
|
+
gpu_id: int | None = None
|
78
|
+
ocr_weight: float = 0.6
|
79
|
+
vec_weight: float = 0.4
|
80
|
+
save_font_debug: bool = False
|
81
|
+
mode: ModeType = "session"
|
82
|
+
|
83
|
+
|
84
|
+
@dataclass
|
85
|
+
class ExporterConfig:
|
86
|
+
cache_dir: str = "./novel_cache"
|
87
|
+
raw_data_dir: str = "./raw_data"
|
88
|
+
output_dir: str = "./downloads"
|
89
|
+
storage_backend: StorageBackend = "json"
|
90
|
+
clean_text: bool = True
|
91
|
+
make_txt: bool = True
|
92
|
+
make_epub: bool = False
|
93
|
+
make_md: bool = False
|
94
|
+
make_pdf: bool = False
|
95
|
+
append_timestamp: bool = True
|
96
|
+
filename_template: str = "{title}_{author}"
|
97
|
+
include_cover: bool = True
|
98
|
+
include_toc: bool = False
|
99
|
+
include_picture: bool = False
|
100
|
+
split_mode: SplitMode = "book"
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.models.login
|
4
|
+
-----------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from typing import Literal
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class LoginField:
|
14
|
+
name: str
|
15
|
+
label: str
|
16
|
+
type: Literal["text", "password", "cookie", "manual_login"]
|
17
|
+
required: bool
|
18
|
+
default: str = ""
|
19
|
+
placeholder: str = ""
|
20
|
+
description: str = ""
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.models.site_rules
|
4
|
+
----------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Literal, TypedDict
|
9
|
+
|
10
|
+
|
11
|
+
class RuleStep(TypedDict, total=False):
|
12
|
+
# —— 操作类型 —— #
|
13
|
+
type: Literal[
|
14
|
+
"attr",
|
15
|
+
"select_one",
|
16
|
+
"select",
|
17
|
+
"find",
|
18
|
+
"find_all",
|
19
|
+
"exclude",
|
20
|
+
"regex",
|
21
|
+
"text",
|
22
|
+
"strip",
|
23
|
+
"replace",
|
24
|
+
"split",
|
25
|
+
"join",
|
26
|
+
]
|
27
|
+
|
28
|
+
# —— BeautifulSoup 相关 —— #
|
29
|
+
selector: str | None # CSS 选择器, 用于 select/select_one/exclude
|
30
|
+
name: str | None # 标签名称, 用于 find/find_all
|
31
|
+
attrs: dict[str, Any] | None # 属性过滤, 用于 find/find_all
|
32
|
+
limit: int | None # find_all 的最大匹配数
|
33
|
+
attr: str | None # 从元素获取属性值 (select/select_one/select_all)
|
34
|
+
|
35
|
+
# —— 正则相关 —— #
|
36
|
+
pattern: str | None # 正则表达式
|
37
|
+
flags: int | None # re.I, re.M 等
|
38
|
+
group: int | None # 匹配结果中的第几个分组 (默认 0)
|
39
|
+
template: str | None # 自定义组合, 比如 "$1$2字"
|
40
|
+
|
41
|
+
# —— 文本处理 —— #
|
42
|
+
chars: str | None # strip 要去除的字符集
|
43
|
+
old: str | None # replace 中要被替换的子串
|
44
|
+
new: str | None # replace 中新的子串
|
45
|
+
count: int | None # replace 中的最大替换次数
|
46
|
+
sep: str | None # split/join 的分隔符
|
47
|
+
index: int | None # split/select_all/select 之后取第几个元素
|
48
|
+
|
49
|
+
|
50
|
+
class FieldRules(TypedDict):
|
51
|
+
steps: list[RuleStep]
|
52
|
+
|
53
|
+
|
54
|
+
class ChapterFieldRules(TypedDict):
|
55
|
+
key: str
|
56
|
+
steps: list[RuleStep]
|
57
|
+
|
58
|
+
|
59
|
+
class VolumesRulesOptional(TypedDict, total=False):
|
60
|
+
volume_selector: str # 有卷时选择 volume 块的 selector
|
61
|
+
volume_name_steps: list[RuleStep]
|
62
|
+
volume_mode: str # Optional: "normal" (default) or "mixed"
|
63
|
+
list_selector: str # Optional: If "mixed" mode, parent container selector
|
64
|
+
|
65
|
+
|
66
|
+
class VolumesRules(VolumesRulesOptional):
|
67
|
+
has_volume: bool # 是否存在卷,false=未分卷
|
68
|
+
chapter_selector: str # 选择 chapter 节点的 selector
|
69
|
+
chapter_steps: list[ChapterFieldRules] # 提取章节信息的步骤列表
|
70
|
+
|
71
|
+
|
72
|
+
class BookInfoRules(TypedDict, total=False):
|
73
|
+
book_name: FieldRules
|
74
|
+
author: FieldRules
|
75
|
+
cover_url: FieldRules
|
76
|
+
update_time: FieldRules
|
77
|
+
serial_status: FieldRules
|
78
|
+
word_count: FieldRules
|
79
|
+
summary: FieldRules
|
80
|
+
volumes: VolumesRules
|
81
|
+
|
82
|
+
|
83
|
+
class ChapterRules(TypedDict, total=False):
|
84
|
+
title: FieldRules
|
85
|
+
content: FieldRules
|
86
|
+
|
87
|
+
|
88
|
+
class SiteProfile(TypedDict):
|
89
|
+
book_info_url: str
|
90
|
+
chapter_url: str
|
91
|
+
|
92
|
+
|
93
|
+
class SiteRules(TypedDict):
|
94
|
+
profile: SiteProfile
|
95
|
+
book_info: BookInfoRules
|
96
|
+
chapter: ChapterRules
|
97
|
+
|
98
|
+
|
99
|
+
SiteRulesDict = dict[str, SiteRules]
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.models.tasks
|
4
|
+
-----------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from dataclasses import dataclass
|
9
|
+
|
10
|
+
|
11
|
+
@dataclass
|
12
|
+
class CidTask:
|
13
|
+
prev_cid: str | None
|
14
|
+
cid: str
|
15
|
+
retry: int = 0
|
16
|
+
vol_idx: int = 0
|
17
|
+
chap_idx: int = 0
|
18
|
+
|
19
|
+
|
20
|
+
@dataclass
|
21
|
+
class RestoreTask:
|
22
|
+
vol_idx: int
|
23
|
+
chap_idx: int
|
24
|
+
prev_cid: str
|
25
|
+
|
26
|
+
|
27
|
+
@dataclass
|
28
|
+
class HtmlTask:
|
29
|
+
cid: str
|
30
|
+
retry: int
|
31
|
+
html_list: list[str]
|
32
|
+
vol_idx: int = 0
|
33
|
+
chap_idx: int = 0
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.models.types
|
4
|
+
-----------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Literal
|
9
|
+
|
10
|
+
ModeType = Literal["browser", "session"]
|
11
|
+
SaveMode = Literal["overwrite", "skip"]
|
12
|
+
StorageBackend = Literal["json", "sqlite"]
|
13
|
+
SplitMode = Literal["book", "volume"]
|
14
|
+
LogLevel = Literal["DEBUG", "INFO", "WARNING", "ERROR"]
|
15
|
+
BrowserType = Literal["chromium", "firefox", "webkit"]
|