novel-downloader 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. novel_downloader/__init__.py +14 -0
  2. novel_downloader/cli/__init__.py +14 -0
  3. novel_downloader/cli/clean.py +134 -0
  4. novel_downloader/cli/download.py +132 -0
  5. novel_downloader/cli/interactive.py +67 -0
  6. novel_downloader/cli/main.py +45 -0
  7. novel_downloader/cli/settings.py +177 -0
  8. novel_downloader/config/__init__.py +52 -0
  9. novel_downloader/config/adapter.py +153 -0
  10. novel_downloader/config/loader.py +177 -0
  11. novel_downloader/config/models.py +173 -0
  12. novel_downloader/config/site_rules.py +97 -0
  13. novel_downloader/core/__init__.py +25 -0
  14. novel_downloader/core/downloaders/__init__.py +22 -0
  15. novel_downloader/core/downloaders/base_async_downloader.py +157 -0
  16. novel_downloader/core/downloaders/base_downloader.py +187 -0
  17. novel_downloader/core/downloaders/common_asynb_downloader.py +207 -0
  18. novel_downloader/core/downloaders/common_downloader.py +191 -0
  19. novel_downloader/core/downloaders/qidian_downloader.py +208 -0
  20. novel_downloader/core/factory/__init__.py +33 -0
  21. novel_downloader/core/factory/downloader_factory.py +149 -0
  22. novel_downloader/core/factory/parser_factory.py +62 -0
  23. novel_downloader/core/factory/requester_factory.py +106 -0
  24. novel_downloader/core/factory/saver_factory.py +49 -0
  25. novel_downloader/core/interfaces/__init__.py +32 -0
  26. novel_downloader/core/interfaces/async_downloader_protocol.py +37 -0
  27. novel_downloader/core/interfaces/async_requester_protocol.py +68 -0
  28. novel_downloader/core/interfaces/downloader_protocol.py +37 -0
  29. novel_downloader/core/interfaces/parser_protocol.py +40 -0
  30. novel_downloader/core/interfaces/requester_protocol.py +65 -0
  31. novel_downloader/core/interfaces/saver_protocol.py +61 -0
  32. novel_downloader/core/parsers/__init__.py +28 -0
  33. novel_downloader/core/parsers/base_parser.py +96 -0
  34. novel_downloader/core/parsers/common_parser/__init__.py +14 -0
  35. novel_downloader/core/parsers/common_parser/helper.py +321 -0
  36. novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
  37. novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
  38. novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
  39. novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
  40. novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
  41. novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
  42. novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
  43. novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
  44. novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
  45. novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
  46. novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
  47. novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
  48. novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
  49. novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
  50. novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
  51. novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
  52. novel_downloader/core/requesters/__init__.py +31 -0
  53. novel_downloader/core/requesters/base_async_session.py +297 -0
  54. novel_downloader/core/requesters/base_browser.py +210 -0
  55. novel_downloader/core/requesters/base_session.py +243 -0
  56. novel_downloader/core/requesters/common_requester/__init__.py +18 -0
  57. novel_downloader/core/requesters/common_requester/common_async_session.py +96 -0
  58. novel_downloader/core/requesters/common_requester/common_session.py +126 -0
  59. novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
  60. novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
  61. novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
  62. novel_downloader/core/savers/__init__.py +20 -0
  63. novel_downloader/core/savers/base_saver.py +169 -0
  64. novel_downloader/core/savers/common_saver/__init__.py +13 -0
  65. novel_downloader/core/savers/common_saver/common_epub.py +232 -0
  66. novel_downloader/core/savers/common_saver/common_txt.py +176 -0
  67. novel_downloader/core/savers/common_saver/main_saver.py +86 -0
  68. novel_downloader/core/savers/epub_utils/__init__.py +27 -0
  69. novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
  70. novel_downloader/core/savers/epub_utils/initializer.py +98 -0
  71. novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
  72. novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
  73. novel_downloader/core/savers/qidian_saver.py +22 -0
  74. novel_downloader/locales/en.json +91 -0
  75. novel_downloader/locales/zh.json +91 -0
  76. novel_downloader/resources/config/rules.toml +196 -0
  77. novel_downloader/resources/config/settings.yaml +73 -0
  78. novel_downloader/resources/css_styles/main.css +104 -0
  79. novel_downloader/resources/css_styles/volume-intro.css +56 -0
  80. novel_downloader/resources/images/volume_border.png +0 -0
  81. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
  82. novel_downloader/resources/json/replace_word_map.json +4 -0
  83. novel_downloader/resources/text/blacklist.txt +22 -0
  84. novel_downloader/utils/__init__.py +0 -0
  85. novel_downloader/utils/cache.py +24 -0
  86. novel_downloader/utils/constants.py +158 -0
  87. novel_downloader/utils/crypto_utils.py +144 -0
  88. novel_downloader/utils/file_utils/__init__.py +43 -0
  89. novel_downloader/utils/file_utils/io.py +252 -0
  90. novel_downloader/utils/file_utils/normalize.py +68 -0
  91. novel_downloader/utils/file_utils/sanitize.py +77 -0
  92. novel_downloader/utils/fontocr/__init__.py +23 -0
  93. novel_downloader/utils/fontocr/ocr_v1.py +304 -0
  94. novel_downloader/utils/fontocr/ocr_v2.py +658 -0
  95. novel_downloader/utils/hash_store.py +288 -0
  96. novel_downloader/utils/hash_utils.py +103 -0
  97. novel_downloader/utils/i18n.py +41 -0
  98. novel_downloader/utils/logger.py +104 -0
  99. novel_downloader/utils/model_loader.py +72 -0
  100. novel_downloader/utils/network.py +287 -0
  101. novel_downloader/utils/state.py +156 -0
  102. novel_downloader/utils/text_utils/__init__.py +27 -0
  103. novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
  104. novel_downloader/utils/text_utils/diff_display.py +75 -0
  105. novel_downloader/utils/text_utils/font_mapping.py +31 -0
  106. novel_downloader/utils/text_utils/text_cleaning.py +57 -0
  107. novel_downloader/utils/time_utils/__init__.py +22 -0
  108. novel_downloader/utils/time_utils/datetime_utils.py +146 -0
  109. novel_downloader/utils/time_utils/sleep_utils.py +49 -0
  110. novel_downloader-1.1.0.dist-info/METADATA +157 -0
  111. novel_downloader-1.1.0.dist-info/RECORD +115 -0
  112. novel_downloader-1.1.0.dist-info/WHEEL +5 -0
  113. novel_downloader-1.1.0.dist-info/entry_points.txt +2 -0
  114. novel_downloader-1.1.0.dist-info/licenses/LICENSE +21 -0
  115. novel_downloader-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,191 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.downloaders.common_downloader
5
+ ---------------------------------------------------
6
+
7
+ This module defines `CommonDownloader`.
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ from typing import Any, Dict
13
+
14
+ from novel_downloader.config import DownloaderConfig
15
+ from novel_downloader.core.interfaces import (
16
+ ParserProtocol,
17
+ RequesterProtocol,
18
+ SaverProtocol,
19
+ )
20
+ from novel_downloader.utils.file_utils import save_as_json, save_as_txt
21
+ from novel_downloader.utils.network import download_image_as_bytes
22
+ from novel_downloader.utils.time_utils import calculate_time_difference
23
+
24
+ from .base_downloader import BaseDownloader
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class CommonDownloader(BaseDownloader):
30
+ """
31
+ Specialized downloader for common novels.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ requester: RequesterProtocol,
37
+ parser: ParserProtocol,
38
+ saver: SaverProtocol,
39
+ config: DownloaderConfig,
40
+ site: str,
41
+ ):
42
+ """
43
+ Initialize the common novel downloader with site information.
44
+
45
+ :param requester: Object implementing RequesterProtocol, used to fetch raw data.
46
+ :param parser: Object implementing ParserProtocol, used to parse page content.
47
+ :param saver: Object implementing SaverProtocol, used to save final output.
48
+ :param config: Downloader configuration object.
49
+ :param site: Identifier for the site the downloader is targeting.
50
+ """
51
+ super().__init__(requester, parser, saver, config)
52
+ self._site = site
53
+
54
+ def download_one(self, book_id: str) -> None:
55
+ """
56
+ The full download logic for a single book.
57
+
58
+ :param book_id: The identifier of the book to download.
59
+ """
60
+ TAG = "[Downloader]"
61
+ save_html = self.config.save_html
62
+ skip_existing = self.config.skip_existing
63
+ site = self.site
64
+ wait_time = self.config.request_interval
65
+
66
+ raw_base = self.raw_data_dir / site / book_id
67
+ cache_base = self.cache_dir / site / book_id
68
+ info_path = raw_base / "book_info.json"
69
+ chapter_dir = raw_base / "chapters"
70
+ if save_html:
71
+ chapters_html_dir = cache_base / "html"
72
+
73
+ raw_base.mkdir(parents=True, exist_ok=True)
74
+ chapter_dir.mkdir(parents=True, exist_ok=True)
75
+
76
+ book_info: Dict[str, Any]
77
+
78
+ try:
79
+ if not info_path.exists():
80
+ raise FileNotFoundError
81
+ book_info = json.loads(info_path.read_text(encoding="utf-8"))
82
+ days, hrs, mins, secs = calculate_time_difference(
83
+ book_info.get("update_time", ""), "UTC+8"
84
+ )
85
+ logger.info(
86
+ "%s Last updated %dd %dh %dm %ds ago", TAG, days, hrs, mins, secs
87
+ )
88
+ if days > 1:
89
+ raise FileNotFoundError # trigger re-fetch
90
+ except Exception:
91
+ info_html = self.requester.get_book_info(book_id, wait_time)
92
+ if save_html:
93
+ info_html_path = chapters_html_dir / "info.html"
94
+ save_as_txt(info_html, info_html_path)
95
+ book_info = self.parser.parse_book_info(info_html)
96
+ if (
97
+ book_info.get("book_name", "") != "未找到书名"
98
+ and book_info.get("update_time", "") != "未找到更新时间"
99
+ ):
100
+ save_as_json(book_info, info_path)
101
+
102
+ # download cover
103
+ cover_url = book_info.get("cover_url", "")
104
+ if cover_url:
105
+ cover_bytes = download_image_as_bytes(cover_url, raw_base)
106
+ if not cover_bytes:
107
+ logger.warning("%s Failed to download cover: %s", TAG, cover_url)
108
+
109
+ # enqueue chapters
110
+ for vol in book_info.get("volumes", []):
111
+ vol_name = vol.get("volume_name", "")
112
+ logger.info("%s Enqueuing volume: %s", TAG, vol_name)
113
+
114
+ for chap in vol.get("chapters", []):
115
+ cid = chap.get("chapterId")
116
+ if not cid:
117
+ logger.warning("%s Skipping chapter without chapterId", TAG)
118
+ continue
119
+
120
+ chap_path = chapter_dir / f"{cid}.json"
121
+ if chap_path.exists() and skip_existing:
122
+ logger.debug(
123
+ "%s Chapter already exists, skipping: %s",
124
+ TAG,
125
+ cid,
126
+ )
127
+ continue
128
+
129
+ chap_title = chap.get("title", "")
130
+ logger.info("%s Fetching chapter: %s (%s)", TAG, chap_title, cid)
131
+ try:
132
+ chap_html = self.requester.get_book_chapter(book_id, cid, wait_time)
133
+
134
+ if save_html:
135
+ html_path = chapters_html_dir / f"{cid}.html"
136
+ save_as_txt(chap_html, html_path, on_exist="skip")
137
+ logger.debug(
138
+ "%s Saved raw HTML for chapter %s to %s",
139
+ TAG,
140
+ cid,
141
+ html_path,
142
+ )
143
+
144
+ chap_json = self.parser.parse_chapter(chap_html, cid)
145
+ if not chap_json:
146
+ logger.warning(
147
+ "%s Parsed chapter json is empty, skipping: %s (%s)",
148
+ TAG,
149
+ chap_title,
150
+ cid,
151
+ )
152
+ continue
153
+ except Exception as e:
154
+ logger.warning(
155
+ "%s Error while processing chapter %s (%s): %s",
156
+ TAG,
157
+ chap_title,
158
+ cid,
159
+ str(e),
160
+ )
161
+ continue
162
+
163
+ save_as_json(chap_json, chap_path)
164
+ logger.info("%s Saved chapter: %s (%s)", TAG, chap_title, cid)
165
+
166
+ self.saver.save(book_id)
167
+
168
+ logger.info(
169
+ "%s Novel '%s' download completed.",
170
+ TAG,
171
+ book_info.get("book_name", "unknown"),
172
+ )
173
+ return
174
+
175
+ @property
176
+ def site(self) -> str:
177
+ """
178
+ Get the site identifier.
179
+
180
+ :return: The site string.
181
+ """
182
+ return self._site
183
+
184
+ @site.setter
185
+ def site(self, value: str) -> None:
186
+ """
187
+ Set the site identifier.
188
+
189
+ :param value: New site string to set.
190
+ """
191
+ self._site = value
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.downloaders.qidian_downloader
5
+ ---------------------------------------------------
6
+
7
+ This module defines `QidianDownloader`, a platform-specific downloader
8
+ implementation for retrieving novels from Qidian (起点中文网).
9
+ """
10
+
11
+ import json
12
+ import logging
13
+ from typing import Any, Dict
14
+
15
+ from novel_downloader.config import DownloaderConfig
16
+ from novel_downloader.core.interfaces import (
17
+ ParserProtocol,
18
+ RequesterProtocol,
19
+ SaverProtocol,
20
+ )
21
+ from novel_downloader.utils.file_utils import save_as_json, save_as_txt
22
+ from novel_downloader.utils.network import download_image_as_bytes
23
+ from novel_downloader.utils.state import state_mgr
24
+ from novel_downloader.utils.time_utils import calculate_time_difference
25
+
26
+ from .base_downloader import BaseDownloader
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class QidianDownloader(BaseDownloader):
32
+ """
33
+ Specialized downloader for Qidian novels.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ requester: RequesterProtocol,
39
+ parser: ParserProtocol,
40
+ saver: SaverProtocol,
41
+ config: DownloaderConfig,
42
+ ):
43
+ super().__init__(requester, parser, saver, config)
44
+
45
+ self._site_key = "qidian"
46
+ self._is_logged_in = self._handle_login()
47
+ state_mgr.set_manual_login_flag(self._site_key, not self._is_logged_in)
48
+
49
+ def _handle_login(self) -> bool:
50
+ """
51
+ Perform login with automatic fallback to manual:
52
+
53
+ 1. If manual_flag is False, try automatic login:
54
+ - On success, return True immediately.
55
+ 2. Always attempt manual login if manual_flag is True.
56
+ 3. Return True if manual login succeeds, False otherwise.
57
+ """
58
+ manual_flag = state_mgr.get_manual_login_flag(self._site_key)
59
+
60
+ # First try automatic login
61
+ if not manual_flag:
62
+ if self._requester.login(manual_login=False):
63
+ return True
64
+
65
+ # try manual login
66
+ return self._requester.login(manual_login=True)
67
+
68
+ def download_one(self, book_id: str) -> None:
69
+ """
70
+ The full download logic for a single book.
71
+
72
+ :param book_id: The identifier of the book to download.
73
+ """
74
+ if not self._is_logged_in:
75
+ logger.warning(
76
+ f"[{self._site_key}] login failed, skipping download of {book_id}"
77
+ )
78
+ return
79
+
80
+ TAG = "[Downloader]"
81
+ save_html = self.config.save_html
82
+ skip_existing = self.config.skip_existing
83
+ wait_time = self.config.request_interval
84
+
85
+ raw_base = self.raw_data_dir / "qidian" / book_id
86
+ cache_base = self.cache_dir / "qidian" / book_id
87
+ info_path = raw_base / "book_info.json"
88
+ chapter_dir = raw_base / "chapters"
89
+ encrypted_chapter_dir = raw_base / "encrypted_chapters"
90
+ if save_html:
91
+ chapters_html_dir = cache_base / "html"
92
+
93
+ raw_base.mkdir(parents=True, exist_ok=True)
94
+ chapter_dir.mkdir(parents=True, exist_ok=True)
95
+ encrypted_chapter_dir.mkdir(parents=True, exist_ok=True)
96
+
97
+ book_info: Dict[str, Any]
98
+
99
+ try:
100
+ if not info_path.exists():
101
+ raise FileNotFoundError
102
+ book_info = json.loads(info_path.read_text(encoding="utf-8"))
103
+ days, hrs, mins, secs = calculate_time_difference(
104
+ book_info.get("update_time", ""), "UTC+8"
105
+ )
106
+ logger.info(
107
+ "%s Last updated %dd %dh %dm %ds ago", TAG, days, hrs, mins, secs
108
+ )
109
+ if days > 1:
110
+ raise FileNotFoundError # trigger re-fetch
111
+ except Exception:
112
+ info_html = self.requester.get_book_info(book_id, wait_time)
113
+ if save_html:
114
+ info_html_path = chapters_html_dir / "info.html"
115
+ save_as_txt(info_html, info_html_path)
116
+ book_info = self.parser.parse_book_info(info_html)
117
+ if (
118
+ book_info.get("book_name", "") != "未找到书名"
119
+ and book_info.get("update_time", "") != "未找到更新时间"
120
+ ):
121
+ save_as_json(book_info, info_path)
122
+
123
+ # download cover
124
+ cover_url = book_info.get("cover_url", "")
125
+ if cover_url:
126
+ cover_bytes = download_image_as_bytes(cover_url, raw_base)
127
+ if not cover_bytes:
128
+ logger.warning("%s Failed to download cover: %s", TAG, cover_url)
129
+
130
+ # enqueue chapters
131
+ for vol in book_info.get("volumes", []):
132
+ vol_name = vol.get("volume_name", "")
133
+ logger.info("%s Enqueuing volume: %s", TAG, vol_name)
134
+
135
+ for chap in vol.get("chapters", []):
136
+ cid = chap.get("chapterId")
137
+ if not cid:
138
+ logger.warning("%s Skipping chapter without chapterId", TAG)
139
+ continue
140
+
141
+ chap_path = chapter_dir / f"{cid}.json"
142
+
143
+ if chap_path.exists() and skip_existing:
144
+ logger.debug(
145
+ "%s Chapter already exists, skipping: %s",
146
+ TAG,
147
+ cid,
148
+ )
149
+ continue
150
+
151
+ chap_title = chap.get("title", "")
152
+ logger.info("%s Fetching chapter: %s (%s)", TAG, chap_title, cid)
153
+ chap_html = self.requester.get_book_chapter(book_id, cid, wait_time)
154
+
155
+ is_encrypted = self.parser.is_encrypted(chap_html) # type: ignore[attr-defined]
156
+
157
+ folder = encrypted_chapter_dir if is_encrypted else chapter_dir
158
+ chap_path = folder / f"{cid}.json"
159
+
160
+ if chap_path.exists() and skip_existing:
161
+ logger.debug(
162
+ "%s Chapter already exists, skipping: %s",
163
+ TAG,
164
+ cid,
165
+ )
166
+ continue
167
+
168
+ if save_html and not is_vip(chap_html):
169
+ folder = chapters_html_dir / (
170
+ "html_encrypted" if is_encrypted else "html_plain"
171
+ )
172
+ html_path = folder / f"{cid}.html"
173
+ save_as_txt(chap_html, html_path, on_exist="skip")
174
+ logger.debug(
175
+ "%s Saved raw HTML for chapter %s to %s", TAG, cid, html_path
176
+ )
177
+
178
+ chap_json = self.parser.parse_chapter(chap_html, cid)
179
+ if not chap_json:
180
+ logger.warning(
181
+ "%s Parsed chapter json is empty, skipping: %s (%s)",
182
+ TAG,
183
+ chap_title,
184
+ cid,
185
+ )
186
+ continue
187
+
188
+ save_as_json(chap_json, chap_path)
189
+ logger.info("%s Saved chapter: %s (%s)", TAG, chap_title, cid)
190
+
191
+ self.saver.save(book_id)
192
+
193
+ logger.info(
194
+ "%s Novel '%s' download completed.",
195
+ TAG,
196
+ book_info.get("book_name", "unknown"),
197
+ )
198
+ return
199
+
200
+
201
+ def is_vip(html_str: str) -> bool:
202
+ """
203
+ Return True if page indicates VIP-only content.
204
+
205
+ :param html_str: Raw HTML string.
206
+ """
207
+ markers = ["这是VIP章节", "需要订阅", "订阅后才能阅读"]
208
+ return any(m in html_str for m in markers)
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.factory
5
+ -----------------------------
6
+
7
+ This package provides factory methods for dynamically retrieving components
8
+ based on runtime parameters such as site name or content type.
9
+ """
10
+
11
+ from .downloader_factory import (
12
+ get_async_downloader,
13
+ get_downloader,
14
+ get_sync_downloader,
15
+ )
16
+ from .parser_factory import get_parser
17
+ from .requester_factory import (
18
+ get_async_requester,
19
+ get_requester,
20
+ get_sync_requester,
21
+ )
22
+ from .saver_factory import get_saver
23
+
24
+ __all__ = [
25
+ "get_async_downloader",
26
+ "get_downloader",
27
+ "get_sync_downloader",
28
+ "get_parser",
29
+ "get_async_requester",
30
+ "get_requester",
31
+ "get_sync_requester",
32
+ "get_saver",
33
+ ]
@@ -0,0 +1,149 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.factory.downloader_factory
5
+ ------------------------------------------------
6
+
7
+ This module implements a factory function for creating downloader instances
8
+ based on the site name and parser mode specified in the configuration.
9
+
10
+ - get_async_downloader -> always returns a CommonAsyncDownloader
11
+ - get_sync_downloader -> returns a site-specific downloader or CommonDownloader
12
+ - get_downloader -> dispatches to one of the above based on config.mode
13
+
14
+ To add support for new sites or modes, extend the `_site_map` accordingly.
15
+ """
16
+
17
+ from typing import Union
18
+
19
+ from novel_downloader.config import DownloaderConfig, load_site_rules
20
+ from novel_downloader.core.downloaders import (
21
+ CommonAsyncDownloader,
22
+ CommonDownloader,
23
+ QidianDownloader,
24
+ )
25
+ from novel_downloader.core.interfaces import (
26
+ AsyncDownloaderProtocol,
27
+ AsyncRequesterProtocol,
28
+ DownloaderProtocol,
29
+ ParserProtocol,
30
+ RequesterProtocol,
31
+ SaverProtocol,
32
+ )
33
+
34
+ _site_map = {
35
+ "qidian": QidianDownloader,
36
+ # "biquge": ...
37
+ }
38
+
39
+
40
+ def get_async_downloader(
41
+ requester: AsyncRequesterProtocol,
42
+ parser: ParserProtocol,
43
+ saver: SaverProtocol,
44
+ site: str,
45
+ config: DownloaderConfig,
46
+ ) -> AsyncDownloaderProtocol:
47
+ """
48
+ Returns an AsyncDownloaderProtocol for the given site.
49
+
50
+ :param requester: Requester implementation
51
+ :param parser: Parser implementation
52
+ :param saver: Saver implementation
53
+ :param site: Site name (e.g., 'qidian')
54
+ :param config: Downloader configuration
55
+
56
+ :return: An instance of a downloader class
57
+
58
+ :raises ValueError: If a site-specific downloader does not support async mode.
59
+ :raises TypeError: If the provided requester does not match the required protocol
60
+ for the chosen mode (sync vs async).
61
+ """
62
+ site_key = site.lower()
63
+
64
+ if not isinstance(requester, AsyncRequesterProtocol):
65
+ raise TypeError("Async mode requires an AsyncRequesterProtocol")
66
+
67
+ site_rules = load_site_rules()
68
+ site_rule = site_rules.get(site_key)
69
+ if site_rule is None:
70
+ raise ValueError(f"Unsupported site: {site}")
71
+
72
+ return CommonAsyncDownloader(requester, parser, saver, config, site_key)
73
+
74
+
75
+ def get_sync_downloader(
76
+ requester: RequesterProtocol,
77
+ parser: ParserProtocol,
78
+ saver: SaverProtocol,
79
+ site: str,
80
+ config: DownloaderConfig,
81
+ ) -> DownloaderProtocol:
82
+ """
83
+ Returns a DownloaderProtocol for the given site.
84
+ First tries a site-specific downloader (e.g. QidianDownloader),
85
+ otherwise falls back to CommonDownloader.
86
+
87
+ :param requester: Requester implementation
88
+ :param parser: Parser implementation
89
+ :param saver: Saver implementation
90
+ :param site: Site name (e.g., 'qidian')
91
+ :param config: Downloader configuration
92
+
93
+ :return: An instance of a downloader class
94
+
95
+ :raises ValueError: If a site-specific downloader does not support async mode.
96
+ :raises TypeError: If the provided requester does not match the required protocol
97
+ for the chosen mode (sync vs async).
98
+ """
99
+ site_key = site.lower()
100
+
101
+ if not isinstance(requester, RequesterProtocol):
102
+ raise TypeError("Sync mode requires a RequesterProtocol")
103
+
104
+ # site-specific
105
+ if site_key in _site_map:
106
+ return _site_map[site_key](requester, parser, saver, config)
107
+
108
+ # fallback
109
+ site_rules = load_site_rules()
110
+ site_rule = site_rules.get(site_key)
111
+ if site_rule is None:
112
+ raise ValueError(f"Unsupported site: {site}")
113
+
114
+ return CommonDownloader(requester, parser, saver, config, site_key)
115
+
116
+
117
+ def get_downloader(
118
+ requester: Union[AsyncRequesterProtocol, RequesterProtocol],
119
+ parser: ParserProtocol,
120
+ saver: SaverProtocol,
121
+ site: str,
122
+ config: DownloaderConfig,
123
+ ) -> Union[AsyncDownloaderProtocol, DownloaderProtocol]:
124
+ """
125
+ Dispatches to get_async_downloader if config.mode == 'async',
126
+ otherwise to get_sync_downloader.
127
+
128
+ :param requester: Requester implementation
129
+ :param parser: Parser implementation
130
+ :param saver: Saver implementation
131
+ :param site: Site name (e.g., 'qidian')
132
+ :param config: Downloader configuration
133
+
134
+ :return: An instance of a downloader class
135
+
136
+ :raises ValueError: If a site-specific downloader does not support async mode.
137
+ :raises TypeError: If the provided requester does not match the required protocol
138
+ for the chosen mode (sync vs async).
139
+ """
140
+ mode = config.mode.lower()
141
+ if mode == "async":
142
+ if not isinstance(requester, AsyncRequesterProtocol):
143
+ raise TypeError("Async mode requires an AsyncRequesterProtocol")
144
+ return get_async_downloader(requester, parser, saver, site, config)
145
+ if mode in ("browser", "session"):
146
+ if not isinstance(requester, RequesterProtocol):
147
+ raise TypeError("Sync mode requires a RequesterProtocol")
148
+ return get_sync_downloader(requester, parser, saver, site, config)
149
+ raise ValueError(f"Unknown mode '{config.mode}' for site '{site}'")
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ novel_downloader.core.factory.parser_factory
5
+ --------------------------------------------
6
+
7
+ This module implements a factory function for creating parser instances
8
+ based on the site name and parser mode specified in the configuration.
9
+
10
+ Currently supported:
11
+ - Site: 'qidian'
12
+ - Modes:
13
+ - 'browser': QidianBrowserParser
14
+ - 'session': (Not implemented yet)
15
+
16
+ To add support for new sites or modes, extend the `_site_map` accordingly.
17
+ """
18
+
19
+ from novel_downloader.config import ParserConfig, load_site_rules
20
+ from novel_downloader.core.interfaces import ParserProtocol
21
+ from novel_downloader.core.parsers import (
22
+ CommonParser,
23
+ QidianBrowserParser,
24
+ QidianSessionParser,
25
+ )
26
+
27
+ _site_map = {
28
+ "qidian": {
29
+ "browser": QidianBrowserParser,
30
+ "session": QidianSessionParser,
31
+ },
32
+ # "biquge": ...
33
+ }
34
+
35
+
36
+ def get_parser(site: str, config: ParserConfig) -> ParserProtocol:
37
+ """
38
+ Returns a site-specific parser instance.
39
+
40
+ :param site: Site name (e.g., 'qidian')
41
+ :param config: Configuration for the parser
42
+ :return: An instance of a parser class
43
+ """
44
+ site_key = site.lower()
45
+
46
+ if site_key in _site_map:
47
+ site_entry = _site_map[site_key]
48
+ if isinstance(site_entry, dict):
49
+ parser_class = site_entry.get(config.mode)
50
+ if parser_class is None:
51
+ raise ValueError(f"Unsupported mode '{config.mode}' for site '{site}'")
52
+ else:
53
+ parser_class = site_entry
54
+ return parser_class(config)
55
+
56
+ # Fallback: site not mapped specially, try to load rule
57
+ site_rules = load_site_rules()
58
+ site_rule = site_rules.get(site_key)
59
+ if site_rule is None:
60
+ raise ValueError(f"Unsupported site: {site}")
61
+
62
+ return CommonParser(config, site_key, site_rule)