novel-downloader 1.1.1__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/PKG-INFO +27 -7
  2. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/README.md +24 -6
  3. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/__init__.py +1 -1
  4. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/cli/download.py +58 -24
  5. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/config/adapter.py +4 -1
  6. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/config/models.py +4 -1
  7. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/downloaders/__init__.py +2 -0
  8. novel_downloader-1.2.0/novel_downloader/core/downloaders/base_async_downloader.py +157 -0
  9. novel_downloader-1.2.0/novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
  10. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/downloaders/common_downloader.py +2 -3
  11. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/factory/__init__.py +14 -2
  12. novel_downloader-1.2.0/novel_downloader/core/factory/downloader_factory.py +149 -0
  13. novel_downloader-1.2.0/novel_downloader/core/factory/requester_factory.py +106 -0
  14. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/interfaces/__init__.py +4 -0
  15. novel_downloader-1.2.0/novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
  16. novel_downloader-1.2.0/novel_downloader/core/interfaces/async_requester_protocol.py +68 -0
  17. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/requesters/__init__.py +5 -1
  18. novel_downloader-1.2.0/novel_downloader/core/requesters/base_async_session.py +297 -0
  19. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/requesters/common_requester/__init__.py +5 -1
  20. novel_downloader-1.2.0/novel_downloader/core/requesters/common_requester/common_async_session.py +96 -0
  21. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/requesters/qidian_requester/qidian_session.py +1 -1
  22. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/resources/config/settings.yaml +4 -1
  23. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/crypto_utils.py +4 -4
  24. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader.egg-info/PKG-INFO +27 -7
  25. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader.egg-info/SOURCES.txt +6 -0
  26. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader.egg-info/requires.txt +3 -0
  27. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/pyproject.toml +3 -1
  28. novel_downloader-1.1.1/novel_downloader/core/factory/downloader_factory.py +0 -62
  29. novel_downloader-1.1.1/novel_downloader/core/factory/requester_factory.py +0 -62
  30. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/LICENSE +0 -0
  31. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/cli/__init__.py +0 -0
  32. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/cli/clean.py +0 -0
  33. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/cli/interactive.py +0 -0
  34. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/cli/main.py +0 -0
  35. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/cli/settings.py +0 -0
  36. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/config/__init__.py +0 -0
  37. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/config/loader.py +0 -0
  38. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/config/site_rules.py +0 -0
  39. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/__init__.py +0 -0
  40. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/downloaders/base_downloader.py +0 -0
  41. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/downloaders/qidian_downloader.py +0 -0
  42. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/factory/parser_factory.py +0 -0
  43. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/factory/saver_factory.py +0 -0
  44. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/interfaces/downloader_protocol.py +0 -0
  45. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/interfaces/parser_protocol.py +0 -0
  46. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/interfaces/requester_protocol.py +0 -0
  47. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/interfaces/saver_protocol.py +0 -0
  48. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/__init__.py +0 -0
  49. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/base_parser.py +0 -0
  50. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/common_parser/__init__.py +0 -0
  51. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/common_parser/helper.py +0 -0
  52. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/common_parser/main_parser.py +0 -0
  53. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/__init__.py +0 -0
  54. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/browser/__init__.py +0 -0
  55. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +0 -0
  56. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +0 -0
  57. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +0 -0
  58. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +0 -0
  59. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/session/__init__.py +0 -0
  60. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +0 -0
  61. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +0 -0
  62. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +0 -0
  63. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/session/main_parser.py +0 -0
  64. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +0 -0
  65. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/shared/__init__.py +0 -0
  66. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +0 -0
  67. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/parsers/qidian_parser/shared/helpers.py +0 -0
  68. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/requesters/base_browser.py +0 -0
  69. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/requesters/base_session.py +0 -0
  70. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/requesters/common_requester/common_session.py +0 -0
  71. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/requesters/qidian_requester/__init__.py +0 -0
  72. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +0 -0
  73. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/__init__.py +0 -0
  74. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/base_saver.py +0 -0
  75. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/common_saver/__init__.py +0 -0
  76. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/common_saver/common_epub.py +0 -0
  77. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/common_saver/common_txt.py +0 -0
  78. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/common_saver/main_saver.py +0 -0
  79. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/epub_utils/__init__.py +0 -0
  80. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/epub_utils/css_builder.py +0 -0
  81. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/epub_utils/initializer.py +0 -0
  82. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/epub_utils/text_to_html.py +0 -0
  83. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/epub_utils/volume_intro.py +0 -0
  84. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/core/savers/qidian_saver.py +0 -0
  85. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/locales/en.json +0 -0
  86. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/locales/zh.json +0 -0
  87. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/resources/config/rules.toml +0 -0
  88. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/resources/css_styles/main.css +0 -0
  89. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/resources/css_styles/volume-intro.css +0 -0
  90. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/resources/images/volume_border.png +0 -0
  91. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/resources/js_scripts/qidian_decrypt_node.js +0 -0
  92. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/resources/json/replace_word_map.json +0 -0
  93. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/resources/text/blacklist.txt +0 -0
  94. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/__init__.py +0 -0
  95. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/cache.py +0 -0
  96. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/constants.py +0 -0
  97. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/file_utils/__init__.py +0 -0
  98. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/file_utils/io.py +0 -0
  99. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/file_utils/normalize.py +0 -0
  100. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/file_utils/sanitize.py +0 -0
  101. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/fontocr/__init__.py +0 -0
  102. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/fontocr/ocr_v1.py +0 -0
  103. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/fontocr/ocr_v2.py +0 -0
  104. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/hash_store.py +0 -0
  105. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/hash_utils.py +0 -0
  106. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/i18n.py +0 -0
  107. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/logger.py +0 -0
  108. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/model_loader.py +0 -0
  109. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/network.py +0 -0
  110. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/state.py +0 -0
  111. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/text_utils/__init__.py +0 -0
  112. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/text_utils/chapter_formatting.py +0 -0
  113. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/text_utils/diff_display.py +0 -0
  114. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/text_utils/font_mapping.py +0 -0
  115. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/text_utils/text_cleaning.py +0 -0
  116. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/time_utils/__init__.py +0 -0
  117. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/time_utils/datetime_utils.py +0 -0
  118. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader/utils/time_utils/sleep_utils.py +0 -0
  119. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader.egg-info/dependency_links.txt +0 -0
  120. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader.egg-info/entry_points.txt +0 -0
  121. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/novel_downloader.egg-info/top_level.txt +0 -0
  122. {novel_downloader-1.1.1 → novel_downloader-1.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: novel-downloader
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: A command-line tool for downloading Chinese web novels from Qidian and similar platforms.
5
5
  Author-email: Saudade Z <saudadez217@gmail.com>
6
6
  License: MIT License
@@ -66,6 +66,8 @@ Requires-Dist: tinycss2; extra == "font-recovery"
66
66
  Requires-Dist: fonttools; extra == "font-recovery"
67
67
  Requires-Dist: pillow; extra == "font-recovery"
68
68
  Requires-Dist: huggingface_hub; extra == "font-recovery"
69
+ Provides-Extra: async
70
+ Requires-Dist: aiohttp; extra == "async"
69
71
  Dynamic: license-file
70
72
 
71
73
  # novel-downloader
@@ -87,19 +89,37 @@ Dynamic: license-file
87
89
 
88
90
  ```bash
89
91
  # 克隆 + 安装
90
- git clone https://github.com/BowenZ217/novel-downloader.git
91
- cd novel-downloader
92
- pip install .
93
- # pip install .[font-recovery]
92
+ pip install novel-downloader
93
+
94
+ # 如需支持字体解密功能 (decode_font), 请使用:
95
+ # pip install novel-downloader[font-recovery]
94
96
 
95
- # 初始化默认配置
97
+ # 如需启用异步抓取模式 (mode=async), 请使用:
98
+ # pip install novel-downloader[async]
99
+
100
+ # 初始化默认配置 (生成 settings.yaml)
96
101
  novel-cli settings init
97
102
 
98
- # 编辑 ./settings.yaml 完成 site/book_ids 等, 可查看 docs/4-settings-schema.md
103
+ # 编辑 ./settings.yaml 完成 site/book_ids
104
+ # 可查看 docs/4-settings-schema.md
105
+
99
106
  # 运行下载
100
107
  novel-cli download 123456
101
108
  ```
102
109
 
110
+ **从 GitHub 安装 (开发版)**
111
+
112
+ 如需体验开发中的最新功能, 可通过 GitHub 安装:
113
+
114
+ ```bash
115
+ git clone https://github.com/BowenZ217/novel-downloader.git
116
+ cd novel-downloader
117
+ pip install .
118
+ # 或安装带可选功能:
119
+ # pip install .[font-recovery]
120
+ # pip install .[async]
121
+ ```
122
+
103
123
  更多使用方法, 查看 [使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/5-usage-examples.md)
104
124
 
105
125
  ---
@@ -17,19 +17,37 @@
17
17
 
18
18
  ```bash
19
19
  # 克隆 + 安装
20
- git clone https://github.com/BowenZ217/novel-downloader.git
21
- cd novel-downloader
22
- pip install .
23
- # pip install .[font-recovery]
20
+ pip install novel-downloader
21
+
22
+ # 如需支持字体解密功能 (decode_font), 请使用:
23
+ # pip install novel-downloader[font-recovery]
24
24
 
25
- # 初始化默认配置
25
+ # 如需启用异步抓取模式 (mode=async), 请使用:
26
+ # pip install novel-downloader[async]
27
+
28
+ # 初始化默认配置 (生成 settings.yaml)
26
29
  novel-cli settings init
27
30
 
28
- # 编辑 ./settings.yaml 完成 site/book_ids 等, 可查看 docs/4-settings-schema.md
31
+ # 编辑 ./settings.yaml 完成 site/book_ids
32
+ # 可查看 docs/4-settings-schema.md
33
+
29
34
  # 运行下载
30
35
  novel-cli download 123456
31
36
  ```
32
37
 
38
+ **从 GitHub 安装 (开发版)**
39
+
40
+ 如需体验开发中的最新功能, 可通过 GitHub 安装:
41
+
42
+ ```bash
43
+ git clone https://github.com/BowenZ217/novel-downloader.git
44
+ cd novel-downloader
45
+ pip install .
46
+ # 或安装带可选功能:
47
+ # pip install .[font-recovery]
48
+ # pip install .[async]
49
+ ```
50
+
33
51
  更多使用方法, 查看 [使用示例](https://github.com/BowenZ217/novel-downloader/blob/main/docs/5-usage-examples.md)
34
52
 
35
53
  ---
@@ -7,7 +7,7 @@ novel_downloader
7
7
  Core package for the Novel Downloader project.
8
8
  """
9
9
 
10
- __version__ = "1.1.1"
10
+ __version__ = "1.2.0"
11
11
 
12
12
  __author__ = "Saudade Z"
13
13
  __email__ = "saudadez217@gmail.com"
@@ -14,11 +14,15 @@ import click
14
14
  from click import Context
15
15
 
16
16
  from novel_downloader.config import ConfigAdapter, load_config
17
- from novel_downloader.core import (
18
- get_downloader,
17
+ from novel_downloader.core.factory import (
18
+ get_async_downloader,
19
+ get_async_requester,
20
+ # get_downloader,
19
21
  get_parser,
20
- get_requester,
22
+ # get_requester,
21
23
  get_saver,
24
+ get_sync_downloader,
25
+ get_sync_requester,
22
26
  )
23
27
  from novel_downloader.utils.i18n import t
24
28
  from novel_downloader.utils.logger import setup_logging
@@ -75,24 +79,54 @@ def download_cli(ctx: Context, book_ids: List[str], site: str) -> None:
75
79
  return
76
80
 
77
81
  # Initialize the requester, parser, saver, and downloader components
78
- curr_requester = get_requester(site, requester_cfg)
79
- curr_parser = get_parser(site, parser_cfg)
80
- curr_saver = get_saver(site, saver_cfg)
81
- setup_logging()
82
- curr_downloader = get_downloader(
83
- requester=curr_requester,
84
- parser=curr_parser,
85
- saver=curr_saver,
86
- site=site,
87
- config=downloader_cfg,
88
- )
89
-
90
- # Perform the download for each valid book ID
91
- for book_id in book_ids:
92
- click.echo(t("download_downloading", book_id=book_id, site=site))
93
- curr_downloader.download_one(book_id)
94
-
95
- # Prompt for parsing and wait for user input before shutting down
96
- if requester_cfg.auto_close:
97
- input(t("download_prompt_parse"))
98
- curr_requester.shutdown()
82
+ if downloader_cfg.mode == "async":
83
+ import asyncio
84
+
85
+ async_requester = get_async_requester(site, requester_cfg)
86
+ async_parser = get_parser(site, parser_cfg)
87
+ async_saver = get_saver(site, saver_cfg)
88
+ setup_logging()
89
+ async_downloader = get_async_downloader(
90
+ requester=async_requester,
91
+ parser=async_parser,
92
+ saver=async_saver,
93
+ site=site,
94
+ config=downloader_cfg,
95
+ )
96
+
97
+ async def async_download_all() -> None:
98
+ prepare = getattr(async_downloader, "prepare", None)
99
+ if prepare and asyncio.iscoroutinefunction(prepare):
100
+ await prepare()
101
+
102
+ for book_id in valid_book_ids:
103
+ click.echo(t("download_downloading", book_id=book_id, site=site))
104
+ await async_downloader.download_one(book_id)
105
+
106
+ if requester_cfg.auto_close:
107
+ input(t("download_prompt_parse"))
108
+ await async_requester.shutdown()
109
+
110
+ asyncio.run(async_download_all())
111
+ else:
112
+ sync_requester = get_sync_requester(site, requester_cfg)
113
+ sync_parser = get_parser(site, parser_cfg)
114
+ sync_saver = get_saver(site, saver_cfg)
115
+ setup_logging()
116
+ sync_downloader = get_sync_downloader(
117
+ requester=sync_requester,
118
+ parser=sync_parser,
119
+ saver=sync_saver,
120
+ site=site,
121
+ config=downloader_cfg,
122
+ )
123
+
124
+ for book_id in book_ids:
125
+ click.echo(t("download_downloading", book_id=book_id, site=site))
126
+ sync_downloader.download_one(book_id)
127
+
128
+ if requester_cfg.auto_close:
129
+ input(t("download_prompt_parse"))
130
+ sync_requester.shutdown()
131
+
132
+ return
@@ -63,6 +63,7 @@ class ConfigAdapter:
63
63
  disable_images=req.get("disable_images", True),
64
64
  mute_audio=req.get("mute_audio", True),
65
65
  mode=site_cfg.get("mode", "session"),
66
+ max_rps=site_cfg.get("max_rps", None),
66
67
  )
67
68
 
68
69
  def get_downloader_config(self) -> DownloaderConfig:
@@ -77,7 +78,9 @@ class ConfigAdapter:
77
78
  request_interval=gen.get("request_interval", 5),
78
79
  raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
79
80
  cache_dir=gen.get("cache_dir", "./cache"),
80
- max_threads=gen.get("max_threads", 4),
81
+ download_workers=gen.get("download_workers", 4),
82
+ parser_workers=gen.get("parser_workers", 4),
83
+ use_process_pool=gen.get("use_process_pool", True),
81
84
  skip_existing=gen.get("skip_existing", True),
82
85
  login_required=site_cfg.get("login_required", False),
83
86
  save_html=debug.get("save_html", False),
@@ -35,6 +35,7 @@ class RequesterConfig:
35
35
  disable_images: bool = True
36
36
  mute_audio: bool = True
37
37
  mode: str = "session" # browser / session / async
38
+ max_rps: Optional[float] = None # Maximum requests per second
38
39
 
39
40
 
40
41
  # === Downloaders ===
@@ -43,7 +44,9 @@ class DownloaderConfig:
43
44
  request_interval: int = 5
44
45
  raw_data_dir: str = "./raw_data"
45
46
  cache_dir: str = "./novel_cache"
46
- max_threads: int = 4
47
+ download_workers: int = 4
48
+ parser_workers: int = 4
49
+ use_process_pool: bool = False
47
50
  skip_existing: bool = True
48
51
  login_required: bool = False
49
52
  save_html: bool = False
@@ -11,10 +11,12 @@ Each downloader is responsible for orchestrating the full lifecycle
11
11
  of retrieving, parsing, and saving novel content for a given source.
12
12
  """
13
13
 
14
+ from .common_asynb_downloader import CommonAsyncDownloader
14
15
  from .common_downloader import CommonDownloader
15
16
  from .qidian_downloader import QidianDownloader
16
17
 
17
18
  __all__ = [
19
+ "CommonAsyncDownloader",
18
20
  "CommonDownloader",
19
21
  "QidianDownloader",
20
22
  ]
@@ -0,0 +1,157 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.downloaders.base_async_downloader
5
+ -------------------------------------------------------
6
+
7
+ Defines the abstract base class `BaseAsyncDownloader`, which provides a
8
+ common interface and reusable logic for all downloader implementations.
9
+ """
10
+
11
+ import abc
12
+ import logging
13
+ from pathlib import Path
14
+ from typing import List
15
+
16
+ from novel_downloader.config import DownloaderConfig
17
+ from novel_downloader.core.interfaces import (
18
+ AsyncRequesterProtocol,
19
+ ParserProtocol,
20
+ SaverProtocol,
21
+ )
22
+ from novel_downloader.core.interfaces.async_downloader_protocol import (
23
+ AsyncDownloaderProtocol,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
30
+ """
31
+ Abstract downloader that defines the initialization interface
32
+ and the general batch download flow.
33
+
34
+ Subclasses must implement the logic for downloading a single book.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ requester: AsyncRequesterProtocol,
40
+ parser: ParserProtocol,
41
+ saver: SaverProtocol,
42
+ config: DownloaderConfig,
43
+ site: str,
44
+ ):
45
+ self._requester = requester
46
+ self._parser = parser
47
+ self._saver = saver
48
+ self._config = config
49
+ self._site = site
50
+
51
+ self._raw_data_dir = Path(config.raw_data_dir) / site
52
+ self._cache_dir = Path(config.cache_dir) / site
53
+ self._raw_data_dir.mkdir(parents=True, exist_ok=True)
54
+ self._cache_dir.mkdir(parents=True, exist_ok=True)
55
+
56
+ @property
57
+ def requester(self) -> AsyncRequesterProtocol:
58
+ return self._requester
59
+
60
+ @property
61
+ def parser(self) -> ParserProtocol:
62
+ return self._parser
63
+
64
+ @property
65
+ def saver(self) -> SaverProtocol:
66
+ return self._saver
67
+
68
+ @property
69
+ def config(self) -> DownloaderConfig:
70
+ return self._config
71
+
72
+ @property
73
+ def raw_data_dir(self) -> Path:
74
+ return self._raw_data_dir
75
+
76
+ @property
77
+ def cache_dir(self) -> Path:
78
+ return self._cache_dir
79
+
80
+ @property
81
+ def site(self) -> str:
82
+ return self._site
83
+
84
+ @property
85
+ def save_html(self) -> bool:
86
+ return self._config.save_html
87
+
88
+ @property
89
+ def skip_existing(self) -> bool:
90
+ return self._config.skip_existing
91
+
92
+ @property
93
+ def login_required(self) -> bool:
94
+ return self._config.login_required
95
+
96
+ @property
97
+ def request_interval(self) -> int:
98
+ return self._config.request_interval
99
+
100
+ async def prepare(self) -> None:
101
+ """
102
+ Optional hook called before downloading each book.
103
+
104
+ Subclasses can override this method to perform pre-download setup.
105
+ """
106
+ return
107
+
108
+ async def download(self, book_ids: List[str]) -> None:
109
+ """
110
+ The general batch download process:
111
+ 1. Iterate over all book IDs
112
+ 2. For each ID, call `download_one()`
113
+
114
+ :param book_ids: A list of book identifiers to download.
115
+ """
116
+ await self.prepare()
117
+
118
+ # 2) batch download
119
+ for idx, book_id in enumerate(book_ids, start=1):
120
+ logger.debug(
121
+ "[%s] Starting download for %r (%s/%s)",
122
+ self.__class__.__name__,
123
+ book_id,
124
+ idx,
125
+ len(book_ids),
126
+ )
127
+ try:
128
+ await self.download_one(book_id)
129
+ except Exception as e:
130
+ self._handle_download_exception(book_id, e)
131
+
132
+ @abc.abstractmethod
133
+ async def download_one(self, book_id: str) -> None:
134
+ """
135
+ The full download logic for a single book.
136
+
137
+ Subclasses must implement this method.
138
+
139
+ :param book_id: The identifier of the book to download.
140
+ """
141
+ ...
142
+
143
+ def _handle_download_exception(self, book_id: str, error: Exception) -> None:
144
+ """
145
+ Handle download errors in a consistent way.
146
+
147
+ This method can be overridden or extended to implement retry logic, etc.
148
+
149
+ :param book_id: The ID of the book that failed.
150
+ :param error: The exception raised during download.
151
+ """
152
+ logger.warning(
153
+ "[%s] Failed to download %r: %s",
154
+ self.__class__.__name__,
155
+ book_id,
156
+ error,
157
+ )
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.downloaders.common_asynb_downloader
5
+ ---------------------------------------------------------
6
+
7
+ This module defines `CommonAsynbDownloader`.
8
+ """
9
+
10
+ import asyncio
11
+ import json
12
+ import logging
13
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
14
+ from typing import Any, Dict, Tuple
15
+
16
+ from novel_downloader.config import DownloaderConfig
17
+ from novel_downloader.core.interfaces import (
18
+ AsyncRequesterProtocol,
19
+ ParserProtocol,
20
+ SaverProtocol,
21
+ )
22
+ from novel_downloader.utils.file_utils import save_as_json, save_as_txt
23
+ from novel_downloader.utils.network import download_image_as_bytes
24
+ from novel_downloader.utils.time_utils import calculate_time_difference
25
+
26
+ from .base_async_downloader import BaseAsyncDownloader
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class CommonAsyncDownloader(BaseAsyncDownloader):
32
+ """
33
+ Specialized Async downloader for common novels.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ requester: AsyncRequesterProtocol,
39
+ parser: ParserProtocol,
40
+ saver: SaverProtocol,
41
+ config: DownloaderConfig,
42
+ site: str,
43
+ ):
44
+ """ """
45
+ super().__init__(requester, parser, saver, config, site)
46
+ self._is_logged_in = False
47
+
48
+ async def prepare(self) -> None:
49
+ """
50
+ Perform login
51
+ """
52
+ if self.login_required and not self._is_logged_in:
53
+ success = await self.requester.login(max_retries=3)
54
+ if not success:
55
+ raise RuntimeError("Login failed")
56
+ self._is_logged_in = True
57
+
58
+ async def download_one(self, book_id: str) -> None:
59
+ """
60
+ The full download logic for a single book.
61
+
62
+ :param book_id: The identifier of the book to download.
63
+ """
64
+ assert isinstance(self.requester, AsyncRequesterProtocol)
65
+
66
+ TAG = "[AsyncDownloader]"
67
+ raw_base = self.raw_data_dir / book_id
68
+ cache_base = self.cache_dir / book_id
69
+ info_path = raw_base / "book_info.json"
70
+ chapters_html_dir = cache_base / "html"
71
+ chapter_dir = raw_base / "chapters"
72
+
73
+ raw_base.mkdir(parents=True, exist_ok=True)
74
+ chapter_dir.mkdir(parents=True, exist_ok=True)
75
+ if self.save_html:
76
+ chapters_html_dir.mkdir(parents=True, exist_ok=True)
77
+
78
+ # load or fetch book_info
79
+ book_info: Dict[str, Any]
80
+ re_fetch = True
81
+ if info_path.exists():
82
+ try:
83
+ data = json.loads(info_path.read_text("utf-8"))
84
+ days, *_ = calculate_time_difference(
85
+ data.get("update_time", ""), "UTC+8"
86
+ )
87
+ re_fetch = days > 1
88
+ except Exception:
89
+ re_fetch = True
90
+
91
+ if re_fetch:
92
+ info_html = await self.requester.get_book_info(
93
+ book_id, self.request_interval
94
+ )
95
+ if self.save_html:
96
+ save_as_txt(info_html, chapters_html_dir / "info.html")
97
+ book_info = self.parser.parse_book_info(info_html)
98
+ if book_info.get("book_name") != "未找到书名":
99
+ save_as_json(book_info, info_path)
100
+ else:
101
+ logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
102
+ else:
103
+ book_info = json.loads(info_path.read_text("utf-8"))
104
+
105
+ # download cover
106
+ cover_url = book_info.get("cover_url", "")
107
+ if cover_url:
108
+ await asyncio.get_running_loop().run_in_executor(
109
+ None, download_image_as_bytes, cover_url, raw_base
110
+ )
111
+
112
+ # setup queue, semaphore, executor
113
+ semaphore = asyncio.Semaphore(self.download_workers)
114
+ queue: asyncio.Queue[Tuple[str, str]] = asyncio.Queue()
115
+ loop = asyncio.get_running_loop()
116
+ executor = (
117
+ ProcessPoolExecutor() if self.use_process_pool else ThreadPoolExecutor()
118
+ )
119
+
120
+ async def parser_worker(worker_id: int) -> None:
121
+ while True:
122
+ cid, html = await queue.get()
123
+ try:
124
+ chap_json = await loop.run_in_executor(
125
+ executor, self.parser.parse_chapter, html, cid
126
+ )
127
+ if chap_json:
128
+ await loop.run_in_executor(
129
+ executor,
130
+ save_as_json,
131
+ chap_json,
132
+ chapter_dir / f"{cid}.json",
133
+ )
134
+ logger.info(
135
+ "%s [Parser-%d] saved chapter %s", TAG, worker_id, cid
136
+ )
137
+ except Exception as e:
138
+ logger.error(
139
+ "%s [Parser-%d] error on chapter %s: %s", TAG, worker_id, cid, e
140
+ )
141
+ finally:
142
+ queue.task_done()
143
+
144
+ async def download_worker(chap: Dict[str, Any]) -> None:
145
+ cid = str(chap.get("chapterId") or "")
146
+ if not cid:
147
+ return
148
+ target = chapter_dir / f"{cid}.json"
149
+ if target.exists() and self.skip_existing:
150
+ logger.info("%s skipping existing chapter %s", TAG, cid)
151
+ return
152
+
153
+ try:
154
+ async with semaphore:
155
+ html = await self.requester.get_book_chapter(
156
+ book_id, cid, self.request_interval
157
+ )
158
+ if self.save_html:
159
+ await loop.run_in_executor(
160
+ executor,
161
+ save_as_txt,
162
+ html,
163
+ chapters_html_dir / f"{cid}.html",
164
+ )
165
+ await queue.put((cid, html))
166
+ logger.info("%s downloaded chapter %s", TAG, cid)
167
+ except Exception as e:
168
+ logger.error("%s error downloading %s: %s", TAG, cid, e)
169
+
170
+ # start parser workers
171
+ parsers = [
172
+ asyncio.create_task(parser_worker(i)) for i in range(self.parser_workers)
173
+ ]
174
+
175
+ # enqueue + run downloads
176
+ download_tasks = []
177
+ for vol in book_info.get("volumes", []):
178
+ for chap in vol.get("chapters", []):
179
+ download_tasks.append(asyncio.create_task(download_worker(chap)))
180
+
181
+ await asyncio.gather(*download_tasks)
182
+ await queue.join() # wait until all parsed
183
+ for p in parsers:
184
+ p.cancel() # stop parser loops
185
+
186
+ # final save
187
+ await loop.run_in_executor(executor, self.saver.save, book_id)
188
+ executor.shutdown(wait=True)
189
+
190
+ logger.info(
191
+ "%s Novel '%s' download completed.",
192
+ TAG,
193
+ book_info.get("book_name", "unknown"),
194
+ )
195
+ return
196
+
197
+ @property
198
+ def parser_workers(self) -> int:
199
+ return self.config.parser_workers
200
+
201
+ @property
202
+ def download_workers(self) -> int:
203
+ return self.config.download_workers
204
+
205
+ @property
206
+ def use_process_pool(self) -> bool:
207
+ return self.config.use_process_pool
@@ -1,11 +1,10 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- novel_downloader.core.downloaders.qidian_downloader
4
+ novel_downloader.core.downloaders.common_downloader
5
5
  ---------------------------------------------------
6
6
 
7
- This module defines `QidianDownloader`, a platform-specific downloader
8
- implementation for retrieving novels from Qidian (起点中文网).
7
+ This module defines `CommonDownloader`.
9
8
  """
10
9
 
11
10
  import json
@@ -8,14 +8,26 @@ This package provides factory methods for dynamically retrieving components
8
8
  based on runtime parameters such as site name or content type.
9
9
  """
10
10
 
11
- from .downloader_factory import get_downloader
11
+ from .downloader_factory import (
12
+ get_async_downloader,
13
+ get_downloader,
14
+ get_sync_downloader,
15
+ )
12
16
  from .parser_factory import get_parser
13
- from .requester_factory import get_requester
17
+ from .requester_factory import (
18
+ get_async_requester,
19
+ get_requester,
20
+ get_sync_requester,
21
+ )
14
22
  from .saver_factory import get_saver
15
23
 
16
24
  __all__ = [
25
+ "get_async_downloader",
17
26
  "get_downloader",
27
+ "get_sync_downloader",
18
28
  "get_parser",
29
+ "get_async_requester",
19
30
  "get_requester",
31
+ "get_sync_requester",
20
32
  "get_saver",
21
33
  ]