novel-downloader 1.1.1__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/PKG-INFO +27 -7
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/README.md +24 -6
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/__init__.py +1 -1
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/cli/download.py +58 -24
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/config/adapter.py +16 -10
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/config/models.py +10 -5
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader-1.2.1/novel_downloader/core/downloaders/base_async_downloader.py +157 -0
- novel_downloader-1.2.1/novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/downloaders/common_downloader.py +2 -3
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/factory/__init__.py +14 -2
- novel_downloader-1.2.1/novel_downloader/core/factory/downloader_factory.py +149 -0
- novel_downloader-1.2.1/novel_downloader/core/factory/requester_factory.py +106 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/interfaces/__init__.py +4 -0
- novel_downloader-1.2.1/novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
- novel_downloader-1.2.1/novel_downloader/core/interfaces/async_requester_protocol.py +70 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/interfaces/requester_protocol.py +3 -3
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +2 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/session/main_parser.py +2 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/requesters/__init__.py +5 -1
- novel_downloader-1.2.1/novel_downloader/core/requesters/base_async_session.py +299 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/requesters/base_browser.py +3 -3
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/requesters/base_session.py +5 -5
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/requesters/common_requester/__init__.py +5 -1
- novel_downloader-1.2.1/novel_downloader/core/requesters/common_requester/common_async_session.py +98 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/requesters/common_requester/common_session.py +2 -2
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +3 -3
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/requesters/qidian_requester/qidian_session.py +4 -4
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/resources/config/settings.yaml +20 -14
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/crypto_utils.py +4 -4
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/fontocr/ocr_v2.py +6 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader.egg-info/PKG-INFO +27 -7
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader.egg-info/SOURCES.txt +6 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader.egg-info/requires.txt +3 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/pyproject.toml +3 -1
- novel_downloader-1.1.1/novel_downloader/core/factory/downloader_factory.py +0 -62
- novel_downloader-1.1.1/novel_downloader/core/factory/requester_factory.py +0 -62
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/LICENSE +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/cli/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/cli/clean.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/cli/interactive.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/cli/main.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/cli/settings.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/config/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/config/loader.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/config/site_rules.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/downloaders/base_downloader.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/downloaders/qidian_downloader.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/factory/parser_factory.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/factory/saver_factory.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/interfaces/downloader_protocol.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/interfaces/parser_protocol.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/interfaces/saver_protocol.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/base_parser.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/common_parser/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/common_parser/helper.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/common_parser/main_parser.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/browser/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/session/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/shared/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/parsers/qidian_parser/shared/helpers.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/requesters/qidian_requester/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/base_saver.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/common_saver/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/common_saver/common_epub.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/common_saver/common_txt.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/common_saver/main_saver.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/epub_utils/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/epub_utils/css_builder.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/epub_utils/initializer.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/epub_utils/text_to_html.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/epub_utils/volume_intro.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/savers/qidian_saver.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/locales/en.json +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/locales/zh.json +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/resources/config/rules.toml +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/resources/css_styles/main.css +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/resources/css_styles/volume-intro.css +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/resources/images/volume_border.png +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/resources/js_scripts/qidian_decrypt_node.js +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/resources/json/replace_word_map.json +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/resources/text/blacklist.txt +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/cache.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/constants.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/file_utils/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/file_utils/io.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/file_utils/normalize.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/file_utils/sanitize.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/fontocr/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/fontocr/ocr_v1.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/hash_store.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/hash_utils.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/i18n.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/logger.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/model_loader.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/network.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/state.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/text_utils/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/text_utils/chapter_formatting.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/text_utils/diff_display.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/text_utils/font_mapping.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/text_utils/text_cleaning.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/time_utils/__init__.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/time_utils/datetime_utils.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/utils/time_utils/sleep_utils.py +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader.egg-info/dependency_links.txt +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader.egg-info/entry_points.txt +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader.egg-info/top_level.txt +0 -0
- {novel_downloader-1.1.1 → novel_downloader-1.2.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: novel-downloader
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.2.1
|
4
4
|
Summary: A command-line tool for downloading Chinese web novels from Qidian and similar platforms.
|
5
5
|
Author-email: Saudade Z <saudadez217@gmail.com>
|
6
6
|
License: MIT License
|
@@ -66,6 +66,8 @@ Requires-Dist: tinycss2; extra == "font-recovery"
|
|
66
66
|
Requires-Dist: fonttools; extra == "font-recovery"
|
67
67
|
Requires-Dist: pillow; extra == "font-recovery"
|
68
68
|
Requires-Dist: huggingface_hub; extra == "font-recovery"
|
69
|
+
Provides-Extra: async
|
70
|
+
Requires-Dist: aiohttp; extra == "async"
|
69
71
|
Dynamic: license-file
|
70
72
|
|
71
73
|
# novel-downloader
|
@@ -87,19 +89,37 @@ Dynamic: license-file
|
|
87
89
|
|
88
90
|
```bash
|
89
91
|
# 克隆 + 安装
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
#
|
92
|
+
pip install novel-downloader
|
93
|
+
|
94
|
+
# 如需支持字体解密功能 (decode_font), 请使用:
|
95
|
+
# pip install novel-downloader[font-recovery]
|
94
96
|
|
95
|
-
#
|
97
|
+
# 如需启用异步抓取模式 (mode=async), 请使用:
|
98
|
+
# pip install novel-downloader[async]
|
99
|
+
|
100
|
+
# 初始化默认配置 (生成 settings.yaml)
|
96
101
|
novel-cli settings init
|
97
102
|
|
98
|
-
# 编辑 ./settings.yaml 完成 site/book_ids
|
103
|
+
# 编辑 ./settings.yaml 完成 site/book_ids 等
|
104
|
+
# 可查看 docs/4-settings-schema.md
|
105
|
+
|
99
106
|
# 运行下载
|
100
107
|
novel-cli download 123456
|
101
108
|
```
|
102
109
|
|
110
|
+
**从 GitHub 安装 (开发版)**
|
111
|
+
|
112
|
+
如需体验开发中的最新功能, 可通过 GitHub 安装:
|
113
|
+
|
114
|
+
```bash
|
115
|
+
git clone https://github.com/BowenZ217/novel-downloader.git
|
116
|
+
cd novel-downloader
|
117
|
+
pip install .
|
118
|
+
# 或安装带可选功能:
|
119
|
+
# pip install .[font-recovery]
|
120
|
+
# pip install .[async]
|
121
|
+
```
|
122
|
+
|
103
123
|
更多使用方法, 查看 [使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/5-usage-examples.md)
|
104
124
|
|
105
125
|
---
|
@@ -17,19 +17,37 @@
|
|
17
17
|
|
18
18
|
```bash
|
19
19
|
# 克隆 + 安装
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
#
|
20
|
+
pip install novel-downloader
|
21
|
+
|
22
|
+
# 如需支持字体解密功能 (decode_font), 请使用:
|
23
|
+
# pip install novel-downloader[font-recovery]
|
24
24
|
|
25
|
-
#
|
25
|
+
# 如需启用异步抓取模式 (mode=async), 请使用:
|
26
|
+
# pip install novel-downloader[async]
|
27
|
+
|
28
|
+
# 初始化默认配置 (生成 settings.yaml)
|
26
29
|
novel-cli settings init
|
27
30
|
|
28
|
-
# 编辑 ./settings.yaml 完成 site/book_ids
|
31
|
+
# 编辑 ./settings.yaml 完成 site/book_ids 等
|
32
|
+
# 可查看 docs/4-settings-schema.md
|
33
|
+
|
29
34
|
# 运行下载
|
30
35
|
novel-cli download 123456
|
31
36
|
```
|
32
37
|
|
38
|
+
**从 GitHub 安装 (开发版)**
|
39
|
+
|
40
|
+
如需体验开发中的最新功能, 可通过 GitHub 安装:
|
41
|
+
|
42
|
+
```bash
|
43
|
+
git clone https://github.com/BowenZ217/novel-downloader.git
|
44
|
+
cd novel-downloader
|
45
|
+
pip install .
|
46
|
+
# 或安装带可选功能:
|
47
|
+
# pip install .[font-recovery]
|
48
|
+
# pip install .[async]
|
49
|
+
```
|
50
|
+
|
33
51
|
更多使用方法, 查看 [使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/5-usage-examples.md)
|
34
52
|
|
35
53
|
---
|
@@ -14,11 +14,15 @@ import click
|
|
14
14
|
from click import Context
|
15
15
|
|
16
16
|
from novel_downloader.config import ConfigAdapter, load_config
|
17
|
-
from novel_downloader.core import (
|
18
|
-
|
17
|
+
from novel_downloader.core.factory import (
|
18
|
+
get_async_downloader,
|
19
|
+
get_async_requester,
|
20
|
+
# get_downloader,
|
19
21
|
get_parser,
|
20
|
-
get_requester,
|
22
|
+
# get_requester,
|
21
23
|
get_saver,
|
24
|
+
get_sync_downloader,
|
25
|
+
get_sync_requester,
|
22
26
|
)
|
23
27
|
from novel_downloader.utils.i18n import t
|
24
28
|
from novel_downloader.utils.logger import setup_logging
|
@@ -75,24 +79,54 @@ def download_cli(ctx: Context, book_ids: List[str], site: str) -> None:
|
|
75
79
|
return
|
76
80
|
|
77
81
|
# Initialize the requester, parser, saver, and downloader components
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
82
|
+
if downloader_cfg.mode == "async":
|
83
|
+
import asyncio
|
84
|
+
|
85
|
+
async_requester = get_async_requester(site, requester_cfg)
|
86
|
+
async_parser = get_parser(site, parser_cfg)
|
87
|
+
async_saver = get_saver(site, saver_cfg)
|
88
|
+
setup_logging()
|
89
|
+
async_downloader = get_async_downloader(
|
90
|
+
requester=async_requester,
|
91
|
+
parser=async_parser,
|
92
|
+
saver=async_saver,
|
93
|
+
site=site,
|
94
|
+
config=downloader_cfg,
|
95
|
+
)
|
96
|
+
|
97
|
+
async def async_download_all() -> None:
|
98
|
+
prepare = getattr(async_downloader, "prepare", None)
|
99
|
+
if prepare and asyncio.iscoroutinefunction(prepare):
|
100
|
+
await prepare()
|
101
|
+
|
102
|
+
for book_id in valid_book_ids:
|
103
|
+
click.echo(t("download_downloading", book_id=book_id, site=site))
|
104
|
+
await async_downloader.download_one(book_id)
|
105
|
+
|
106
|
+
if requester_cfg.auto_close:
|
107
|
+
input(t("download_prompt_parse"))
|
108
|
+
await async_requester.shutdown()
|
109
|
+
|
110
|
+
asyncio.run(async_download_all())
|
111
|
+
else:
|
112
|
+
sync_requester = get_sync_requester(site, requester_cfg)
|
113
|
+
sync_parser = get_parser(site, parser_cfg)
|
114
|
+
sync_saver = get_saver(site, saver_cfg)
|
115
|
+
setup_logging()
|
116
|
+
sync_downloader = get_sync_downloader(
|
117
|
+
requester=sync_requester,
|
118
|
+
parser=sync_parser,
|
119
|
+
saver=sync_saver,
|
120
|
+
site=site,
|
121
|
+
config=downloader_cfg,
|
122
|
+
)
|
123
|
+
|
124
|
+
for book_id in book_ids:
|
125
|
+
click.echo(t("download_downloading", book_id=book_id, site=site))
|
126
|
+
sync_downloader.download_one(book_id)
|
127
|
+
|
128
|
+
if requester_cfg.auto_close:
|
129
|
+
input(t("download_prompt_parse"))
|
130
|
+
sync_requester.shutdown()
|
131
|
+
|
132
|
+
return
|
@@ -63,6 +63,7 @@ class ConfigAdapter:
|
|
63
63
|
disable_images=req.get("disable_images", True),
|
64
64
|
mute_audio=req.get("mute_audio", True),
|
65
65
|
mode=site_cfg.get("mode", "session"),
|
66
|
+
max_rps=site_cfg.get("max_rps", None),
|
66
67
|
)
|
67
68
|
|
68
69
|
def get_downloader_config(self) -> DownloaderConfig:
|
@@ -77,7 +78,9 @@ class ConfigAdapter:
|
|
77
78
|
request_interval=gen.get("request_interval", 5),
|
78
79
|
raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
|
79
80
|
cache_dir=gen.get("cache_dir", "./cache"),
|
80
|
-
|
81
|
+
download_workers=gen.get("download_workers", 4),
|
82
|
+
parser_workers=gen.get("parser_workers", 4),
|
83
|
+
use_process_pool=gen.get("use_process_pool", True),
|
81
84
|
skip_existing=gen.get("skip_existing", True),
|
82
85
|
login_required=site_cfg.get("login_required", False),
|
83
86
|
save_html=debug.get("save_html", False),
|
@@ -90,18 +93,21 @@ class ConfigAdapter:
|
|
90
93
|
config["sites"][site] 中读取解析器相关配置, 返回 ParserConfig 实例
|
91
94
|
"""
|
92
95
|
gen = self._config.get("general", {})
|
96
|
+
font_ocr = gen.get("font_ocr", {})
|
93
97
|
site_cfg = self._config.get("sites", {}).get(self._site, {})
|
94
98
|
return ParserConfig(
|
95
99
|
cache_dir=gen.get("cache_dir", "./cache"),
|
96
|
-
decode_font=
|
97
|
-
use_freq=
|
98
|
-
use_ocr=
|
99
|
-
use_vec=
|
100
|
-
ocr_version=
|
101
|
-
save_font_debug=
|
102
|
-
batch_size=
|
103
|
-
|
104
|
-
|
100
|
+
decode_font=font_ocr.get("decode_font", False),
|
101
|
+
use_freq=font_ocr.get("use_freq", False),
|
102
|
+
use_ocr=font_ocr.get("use_ocr", True),
|
103
|
+
use_vec=font_ocr.get("use_vec", False),
|
104
|
+
ocr_version=font_ocr.get("ocr_version", "v1.0"),
|
105
|
+
save_font_debug=font_ocr.get("save_font_debug", False),
|
106
|
+
batch_size=font_ocr.get("batch_size", 32),
|
107
|
+
gpu_mem=font_ocr.get("gpu_mem", 500),
|
108
|
+
gpu_id=font_ocr.get("gpu_id", None),
|
109
|
+
ocr_weight=font_ocr.get("ocr_weight", 0.6),
|
110
|
+
vec_weight=font_ocr.get("vec_weight", 0.4),
|
105
111
|
mode=site_cfg.get("mode", "session"),
|
106
112
|
)
|
107
113
|
|
@@ -24,10 +24,10 @@ from typing import Any, Dict, List, Literal, Optional, TypedDict
|
|
24
24
|
# === Requesters ===
|
25
25
|
@dataclass
|
26
26
|
class RequesterConfig:
|
27
|
-
wait_time:
|
27
|
+
wait_time: float = 5.0
|
28
28
|
retry_times: int = 3
|
29
|
-
retry_interval:
|
30
|
-
timeout:
|
29
|
+
retry_interval: float = 5.0
|
30
|
+
timeout: float = 30.0
|
31
31
|
headless: bool = True
|
32
32
|
user_data_folder: str = ""
|
33
33
|
profile_name: str = ""
|
@@ -35,15 +35,18 @@ class RequesterConfig:
|
|
35
35
|
disable_images: bool = True
|
36
36
|
mute_audio: bool = True
|
37
37
|
mode: str = "session" # browser / session / async
|
38
|
+
max_rps: Optional[float] = None # Maximum requests per second
|
38
39
|
|
39
40
|
|
40
41
|
# === Downloaders ===
|
41
42
|
@dataclass
|
42
43
|
class DownloaderConfig:
|
43
|
-
request_interval:
|
44
|
+
request_interval: float = 5.0
|
44
45
|
raw_data_dir: str = "./raw_data"
|
45
46
|
cache_dir: str = "./novel_cache"
|
46
|
-
|
47
|
+
download_workers: int = 4
|
48
|
+
parser_workers: int = 4
|
49
|
+
use_process_pool: bool = False
|
47
50
|
skip_existing: bool = True
|
48
51
|
login_required: bool = False
|
49
52
|
save_html: bool = False
|
@@ -60,6 +63,8 @@ class ParserConfig:
|
|
60
63
|
use_vec: bool = False
|
61
64
|
ocr_version: str = "v1.0"
|
62
65
|
batch_size: int = 32
|
66
|
+
gpu_mem: int = 500
|
67
|
+
gpu_id: Optional[int] = None
|
63
68
|
ocr_weight: float = 0.6
|
64
69
|
vec_weight: float = 0.4
|
65
70
|
save_font_debug: bool = False
|
{novel_downloader-1.1.1 → novel_downloader-1.2.1}/novel_downloader/core/downloaders/__init__.py
RENAMED
@@ -11,10 +11,12 @@ Each downloader is responsible for orchestrating the full lifecycle
|
|
11
11
|
of retrieving, parsing, and saving novel content for a given source.
|
12
12
|
"""
|
13
13
|
|
14
|
+
from .common_asynb_downloader import CommonAsyncDownloader
|
14
15
|
from .common_downloader import CommonDownloader
|
15
16
|
from .qidian_downloader import QidianDownloader
|
16
17
|
|
17
18
|
__all__ = [
|
19
|
+
"CommonAsyncDownloader",
|
18
20
|
"CommonDownloader",
|
19
21
|
"QidianDownloader",
|
20
22
|
]
|
@@ -0,0 +1,157 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.downloaders.base_async_downloader
|
5
|
+
-------------------------------------------------------
|
6
|
+
|
7
|
+
Defines the abstract base class `BaseAsyncDownloader`, which provides a
|
8
|
+
common interface and reusable logic for all downloader implementations.
|
9
|
+
"""
|
10
|
+
|
11
|
+
import abc
|
12
|
+
import logging
|
13
|
+
from pathlib import Path
|
14
|
+
from typing import List
|
15
|
+
|
16
|
+
from novel_downloader.config import DownloaderConfig
|
17
|
+
from novel_downloader.core.interfaces import (
|
18
|
+
AsyncRequesterProtocol,
|
19
|
+
ParserProtocol,
|
20
|
+
SaverProtocol,
|
21
|
+
)
|
22
|
+
from novel_downloader.core.interfaces.async_downloader_protocol import (
|
23
|
+
AsyncDownloaderProtocol,
|
24
|
+
)
|
25
|
+
|
26
|
+
logger = logging.getLogger(__name__)
|
27
|
+
|
28
|
+
|
29
|
+
class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
|
30
|
+
"""
|
31
|
+
Abstract downloader that defines the initialization interface
|
32
|
+
and the general batch download flow.
|
33
|
+
|
34
|
+
Subclasses must implement the logic for downloading a single book.
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(
|
38
|
+
self,
|
39
|
+
requester: AsyncRequesterProtocol,
|
40
|
+
parser: ParserProtocol,
|
41
|
+
saver: SaverProtocol,
|
42
|
+
config: DownloaderConfig,
|
43
|
+
site: str,
|
44
|
+
):
|
45
|
+
self._requester = requester
|
46
|
+
self._parser = parser
|
47
|
+
self._saver = saver
|
48
|
+
self._config = config
|
49
|
+
self._site = site
|
50
|
+
|
51
|
+
self._raw_data_dir = Path(config.raw_data_dir) / site
|
52
|
+
self._cache_dir = Path(config.cache_dir) / site
|
53
|
+
self._raw_data_dir.mkdir(parents=True, exist_ok=True)
|
54
|
+
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
55
|
+
|
56
|
+
@property
|
57
|
+
def requester(self) -> AsyncRequesterProtocol:
|
58
|
+
return self._requester
|
59
|
+
|
60
|
+
@property
|
61
|
+
def parser(self) -> ParserProtocol:
|
62
|
+
return self._parser
|
63
|
+
|
64
|
+
@property
|
65
|
+
def saver(self) -> SaverProtocol:
|
66
|
+
return self._saver
|
67
|
+
|
68
|
+
@property
|
69
|
+
def config(self) -> DownloaderConfig:
|
70
|
+
return self._config
|
71
|
+
|
72
|
+
@property
|
73
|
+
def raw_data_dir(self) -> Path:
|
74
|
+
return self._raw_data_dir
|
75
|
+
|
76
|
+
@property
|
77
|
+
def cache_dir(self) -> Path:
|
78
|
+
return self._cache_dir
|
79
|
+
|
80
|
+
@property
|
81
|
+
def site(self) -> str:
|
82
|
+
return self._site
|
83
|
+
|
84
|
+
@property
|
85
|
+
def save_html(self) -> bool:
|
86
|
+
return self._config.save_html
|
87
|
+
|
88
|
+
@property
|
89
|
+
def skip_existing(self) -> bool:
|
90
|
+
return self._config.skip_existing
|
91
|
+
|
92
|
+
@property
|
93
|
+
def login_required(self) -> bool:
|
94
|
+
return self._config.login_required
|
95
|
+
|
96
|
+
@property
|
97
|
+
def request_interval(self) -> float:
|
98
|
+
return self._config.request_interval
|
99
|
+
|
100
|
+
async def prepare(self) -> None:
|
101
|
+
"""
|
102
|
+
Optional hook called before downloading each book.
|
103
|
+
|
104
|
+
Subclasses can override this method to perform pre-download setup.
|
105
|
+
"""
|
106
|
+
return
|
107
|
+
|
108
|
+
async def download(self, book_ids: List[str]) -> None:
|
109
|
+
"""
|
110
|
+
The general batch download process:
|
111
|
+
1. Iterate over all book IDs
|
112
|
+
2. For each ID, call `download_one()`
|
113
|
+
|
114
|
+
:param book_ids: A list of book identifiers to download.
|
115
|
+
"""
|
116
|
+
await self.prepare()
|
117
|
+
|
118
|
+
# 2) batch download
|
119
|
+
for idx, book_id in enumerate(book_ids, start=1):
|
120
|
+
logger.debug(
|
121
|
+
"[%s] Starting download for %r (%s/%s)",
|
122
|
+
self.__class__.__name__,
|
123
|
+
book_id,
|
124
|
+
idx,
|
125
|
+
len(book_ids),
|
126
|
+
)
|
127
|
+
try:
|
128
|
+
await self.download_one(book_id)
|
129
|
+
except Exception as e:
|
130
|
+
self._handle_download_exception(book_id, e)
|
131
|
+
|
132
|
+
@abc.abstractmethod
|
133
|
+
async def download_one(self, book_id: str) -> None:
|
134
|
+
"""
|
135
|
+
The full download logic for a single book.
|
136
|
+
|
137
|
+
Subclasses must implement this method.
|
138
|
+
|
139
|
+
:param book_id: The identifier of the book to download.
|
140
|
+
"""
|
141
|
+
...
|
142
|
+
|
143
|
+
def _handle_download_exception(self, book_id: str, error: Exception) -> None:
|
144
|
+
"""
|
145
|
+
Handle download errors in a consistent way.
|
146
|
+
|
147
|
+
This method can be overridden or extended to implement retry logic, etc.
|
148
|
+
|
149
|
+
:param book_id: The ID of the book that failed.
|
150
|
+
:param error: The exception raised during download.
|
151
|
+
"""
|
152
|
+
logger.warning(
|
153
|
+
"[%s] Failed to download %r: %s",
|
154
|
+
self.__class__.__name__,
|
155
|
+
book_id,
|
156
|
+
error,
|
157
|
+
)
|
@@ -0,0 +1,207 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.downloaders.common_asynb_downloader
|
5
|
+
---------------------------------------------------------
|
6
|
+
|
7
|
+
This module defines `CommonAsynbDownloader`.
|
8
|
+
"""
|
9
|
+
|
10
|
+
import asyncio
|
11
|
+
import json
|
12
|
+
import logging
|
13
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
14
|
+
from typing import Any, Dict, Tuple
|
15
|
+
|
16
|
+
from novel_downloader.config import DownloaderConfig
|
17
|
+
from novel_downloader.core.interfaces import (
|
18
|
+
AsyncRequesterProtocol,
|
19
|
+
ParserProtocol,
|
20
|
+
SaverProtocol,
|
21
|
+
)
|
22
|
+
from novel_downloader.utils.file_utils import save_as_json, save_as_txt
|
23
|
+
from novel_downloader.utils.network import download_image_as_bytes
|
24
|
+
from novel_downloader.utils.time_utils import calculate_time_difference
|
25
|
+
|
26
|
+
from .base_async_downloader import BaseAsyncDownloader
|
27
|
+
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
class CommonAsyncDownloader(BaseAsyncDownloader):
|
32
|
+
"""
|
33
|
+
Specialized Async downloader for common novels.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
requester: AsyncRequesterProtocol,
|
39
|
+
parser: ParserProtocol,
|
40
|
+
saver: SaverProtocol,
|
41
|
+
config: DownloaderConfig,
|
42
|
+
site: str,
|
43
|
+
):
|
44
|
+
""" """
|
45
|
+
super().__init__(requester, parser, saver, config, site)
|
46
|
+
self._is_logged_in = False
|
47
|
+
|
48
|
+
async def prepare(self) -> None:
|
49
|
+
"""
|
50
|
+
Perform login
|
51
|
+
"""
|
52
|
+
if self.login_required and not self._is_logged_in:
|
53
|
+
success = await self.requester.login(max_retries=3)
|
54
|
+
if not success:
|
55
|
+
raise RuntimeError("Login failed")
|
56
|
+
self._is_logged_in = True
|
57
|
+
|
58
|
+
async def download_one(self, book_id: str) -> None:
|
59
|
+
"""
|
60
|
+
The full download logic for a single book.
|
61
|
+
|
62
|
+
:param book_id: The identifier of the book to download.
|
63
|
+
"""
|
64
|
+
assert isinstance(self.requester, AsyncRequesterProtocol)
|
65
|
+
|
66
|
+
TAG = "[AsyncDownloader]"
|
67
|
+
raw_base = self.raw_data_dir / book_id
|
68
|
+
cache_base = self.cache_dir / book_id
|
69
|
+
info_path = raw_base / "book_info.json"
|
70
|
+
chapters_html_dir = cache_base / "html"
|
71
|
+
chapter_dir = raw_base / "chapters"
|
72
|
+
|
73
|
+
raw_base.mkdir(parents=True, exist_ok=True)
|
74
|
+
chapter_dir.mkdir(parents=True, exist_ok=True)
|
75
|
+
if self.save_html:
|
76
|
+
chapters_html_dir.mkdir(parents=True, exist_ok=True)
|
77
|
+
|
78
|
+
# load or fetch book_info
|
79
|
+
book_info: Dict[str, Any]
|
80
|
+
re_fetch = True
|
81
|
+
if info_path.exists():
|
82
|
+
try:
|
83
|
+
data = json.loads(info_path.read_text("utf-8"))
|
84
|
+
days, *_ = calculate_time_difference(
|
85
|
+
data.get("update_time", ""), "UTC+8"
|
86
|
+
)
|
87
|
+
re_fetch = days > 1
|
88
|
+
except Exception:
|
89
|
+
re_fetch = True
|
90
|
+
|
91
|
+
if re_fetch:
|
92
|
+
info_html = await self.requester.get_book_info(
|
93
|
+
book_id, self.request_interval
|
94
|
+
)
|
95
|
+
if self.save_html:
|
96
|
+
save_as_txt(info_html, chapters_html_dir / "info.html")
|
97
|
+
book_info = self.parser.parse_book_info(info_html)
|
98
|
+
if book_info.get("book_name") != "未找到书名":
|
99
|
+
save_as_json(book_info, info_path)
|
100
|
+
else:
|
101
|
+
logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
|
102
|
+
else:
|
103
|
+
book_info = json.loads(info_path.read_text("utf-8"))
|
104
|
+
|
105
|
+
# download cover
|
106
|
+
cover_url = book_info.get("cover_url", "")
|
107
|
+
if cover_url:
|
108
|
+
await asyncio.get_running_loop().run_in_executor(
|
109
|
+
None, download_image_as_bytes, cover_url, raw_base
|
110
|
+
)
|
111
|
+
|
112
|
+
# setup queue, semaphore, executor
|
113
|
+
semaphore = asyncio.Semaphore(self.download_workers)
|
114
|
+
queue: asyncio.Queue[Tuple[str, str]] = asyncio.Queue()
|
115
|
+
loop = asyncio.get_running_loop()
|
116
|
+
executor = (
|
117
|
+
ProcessPoolExecutor() if self.use_process_pool else ThreadPoolExecutor()
|
118
|
+
)
|
119
|
+
|
120
|
+
async def parser_worker(worker_id: int) -> None:
|
121
|
+
while True:
|
122
|
+
cid, html = await queue.get()
|
123
|
+
try:
|
124
|
+
chap_json = await loop.run_in_executor(
|
125
|
+
executor, self.parser.parse_chapter, html, cid
|
126
|
+
)
|
127
|
+
if chap_json:
|
128
|
+
await loop.run_in_executor(
|
129
|
+
executor,
|
130
|
+
save_as_json,
|
131
|
+
chap_json,
|
132
|
+
chapter_dir / f"{cid}.json",
|
133
|
+
)
|
134
|
+
logger.info(
|
135
|
+
"%s [Parser-%d] saved chapter %s", TAG, worker_id, cid
|
136
|
+
)
|
137
|
+
except Exception as e:
|
138
|
+
logger.error(
|
139
|
+
"%s [Parser-%d] error on chapter %s: %s", TAG, worker_id, cid, e
|
140
|
+
)
|
141
|
+
finally:
|
142
|
+
queue.task_done()
|
143
|
+
|
144
|
+
async def download_worker(chap: Dict[str, Any]) -> None:
|
145
|
+
cid = str(chap.get("chapterId") or "")
|
146
|
+
if not cid:
|
147
|
+
return
|
148
|
+
target = chapter_dir / f"{cid}.json"
|
149
|
+
if target.exists() and self.skip_existing:
|
150
|
+
logger.info("%s skipping existing chapter %s", TAG, cid)
|
151
|
+
return
|
152
|
+
|
153
|
+
try:
|
154
|
+
async with semaphore:
|
155
|
+
html = await self.requester.get_book_chapter(
|
156
|
+
book_id, cid, self.request_interval
|
157
|
+
)
|
158
|
+
if self.save_html:
|
159
|
+
await loop.run_in_executor(
|
160
|
+
executor,
|
161
|
+
save_as_txt,
|
162
|
+
html,
|
163
|
+
chapters_html_dir / f"{cid}.html",
|
164
|
+
)
|
165
|
+
await queue.put((cid, html))
|
166
|
+
logger.info("%s downloaded chapter %s", TAG, cid)
|
167
|
+
except Exception as e:
|
168
|
+
logger.error("%s error downloading %s: %s", TAG, cid, e)
|
169
|
+
|
170
|
+
# start parser workers
|
171
|
+
parsers = [
|
172
|
+
asyncio.create_task(parser_worker(i)) for i in range(self.parser_workers)
|
173
|
+
]
|
174
|
+
|
175
|
+
# enqueue + run downloads
|
176
|
+
download_tasks = []
|
177
|
+
for vol in book_info.get("volumes", []):
|
178
|
+
for chap in vol.get("chapters", []):
|
179
|
+
download_tasks.append(asyncio.create_task(download_worker(chap)))
|
180
|
+
|
181
|
+
await asyncio.gather(*download_tasks)
|
182
|
+
await queue.join() # wait until all parsed
|
183
|
+
for p in parsers:
|
184
|
+
p.cancel() # stop parser loops
|
185
|
+
|
186
|
+
# final save
|
187
|
+
await loop.run_in_executor(executor, self.saver.save, book_id)
|
188
|
+
executor.shutdown(wait=True)
|
189
|
+
|
190
|
+
logger.info(
|
191
|
+
"%s Novel '%s' download completed.",
|
192
|
+
TAG,
|
193
|
+
book_info.get("book_name", "unknown"),
|
194
|
+
)
|
195
|
+
return
|
196
|
+
|
197
|
+
@property
|
198
|
+
def parser_workers(self) -> int:
|
199
|
+
return self.config.parser_workers
|
200
|
+
|
201
|
+
@property
|
202
|
+
def download_workers(self) -> int:
|
203
|
+
return self.config.download_workers
|
204
|
+
|
205
|
+
@property
|
206
|
+
def use_process_pool(self) -> bool:
|
207
|
+
return self.config.use_process_pool
|