novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -2
  3. novel_downloader/cli/config.py +1 -83
  4. novel_downloader/cli/download.py +4 -5
  5. novel_downloader/cli/export.py +4 -1
  6. novel_downloader/cli/main.py +2 -0
  7. novel_downloader/cli/search.py +123 -0
  8. novel_downloader/config/__init__.py +3 -10
  9. novel_downloader/config/adapter.py +190 -54
  10. novel_downloader/config/loader.py +2 -3
  11. novel_downloader/core/__init__.py +13 -13
  12. novel_downloader/core/downloaders/__init__.py +10 -11
  13. novel_downloader/core/downloaders/base.py +152 -26
  14. novel_downloader/core/downloaders/biquge.py +5 -1
  15. novel_downloader/core/downloaders/common.py +157 -378
  16. novel_downloader/core/downloaders/esjzone.py +5 -1
  17. novel_downloader/core/downloaders/linovelib.py +5 -1
  18. novel_downloader/core/downloaders/qianbi.py +291 -4
  19. novel_downloader/core/downloaders/qidian.py +199 -285
  20. novel_downloader/core/downloaders/registry.py +67 -0
  21. novel_downloader/core/downloaders/sfacg.py +5 -1
  22. novel_downloader/core/downloaders/yamibo.py +5 -1
  23. novel_downloader/core/exporters/__init__.py +10 -11
  24. novel_downloader/core/exporters/base.py +87 -7
  25. novel_downloader/core/exporters/biquge.py +5 -8
  26. novel_downloader/core/exporters/common/__init__.py +2 -2
  27. novel_downloader/core/exporters/common/epub.py +82 -166
  28. novel_downloader/core/exporters/common/main_exporter.py +0 -60
  29. novel_downloader/core/exporters/common/txt.py +82 -83
  30. novel_downloader/core/exporters/epub_util.py +157 -1330
  31. novel_downloader/core/exporters/esjzone.py +5 -8
  32. novel_downloader/core/exporters/linovelib/__init__.py +2 -2
  33. novel_downloader/core/exporters/linovelib/epub.py +157 -212
  34. novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
  35. novel_downloader/core/exporters/linovelib/txt.py +67 -63
  36. novel_downloader/core/exporters/qianbi.py +5 -8
  37. novel_downloader/core/exporters/qidian.py +14 -4
  38. novel_downloader/core/exporters/registry.py +53 -0
  39. novel_downloader/core/exporters/sfacg.py +5 -8
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/exporters/yamibo.py +5 -8
  42. novel_downloader/core/fetchers/__init__.py +19 -24
  43. novel_downloader/core/fetchers/base/__init__.py +3 -3
  44. novel_downloader/core/fetchers/base/browser.py +23 -4
  45. novel_downloader/core/fetchers/base/session.py +30 -5
  46. novel_downloader/core/fetchers/biquge/__init__.py +3 -3
  47. novel_downloader/core/fetchers/biquge/browser.py +5 -0
  48. novel_downloader/core/fetchers/biquge/session.py +6 -1
  49. novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
  50. novel_downloader/core/fetchers/esjzone/browser.py +5 -0
  51. novel_downloader/core/fetchers/esjzone/session.py +6 -1
  52. novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
  53. novel_downloader/core/fetchers/linovelib/browser.py +6 -1
  54. novel_downloader/core/fetchers/linovelib/session.py +6 -1
  55. novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
  56. novel_downloader/core/fetchers/qianbi/browser.py +5 -0
  57. novel_downloader/core/fetchers/qianbi/session.py +5 -0
  58. novel_downloader/core/fetchers/qidian/__init__.py +3 -3
  59. novel_downloader/core/fetchers/qidian/browser.py +12 -4
  60. novel_downloader/core/fetchers/qidian/session.py +11 -3
  61. novel_downloader/core/fetchers/registry.py +71 -0
  62. novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
  63. novel_downloader/core/fetchers/sfacg/browser.py +5 -0
  64. novel_downloader/core/fetchers/sfacg/session.py +5 -0
  65. novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
  66. novel_downloader/core/fetchers/yamibo/browser.py +5 -0
  67. novel_downloader/core/fetchers/yamibo/session.py +6 -1
  68. novel_downloader/core/interfaces/__init__.py +7 -5
  69. novel_downloader/core/interfaces/searcher.py +18 -0
  70. novel_downloader/core/parsers/__init__.py +10 -11
  71. novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
  72. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
  73. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
  74. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
  75. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  76. novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
  77. novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
  78. novel_downloader/core/parsers/qidian/main_parser.py +10 -21
  79. novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
  80. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
  81. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  82. novel_downloader/core/parsers/registry.py +68 -0
  83. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
  84. novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
  85. novel_downloader/core/searchers/__init__.py +20 -0
  86. novel_downloader/core/searchers/base.py +92 -0
  87. novel_downloader/core/searchers/biquge.py +83 -0
  88. novel_downloader/core/searchers/esjzone.py +84 -0
  89. novel_downloader/core/searchers/qianbi.py +131 -0
  90. novel_downloader/core/searchers/qidian.py +87 -0
  91. novel_downloader/core/searchers/registry.py +63 -0
  92. novel_downloader/locales/en.json +12 -4
  93. novel_downloader/locales/zh.json +12 -4
  94. novel_downloader/models/__init__.py +4 -30
  95. novel_downloader/models/config.py +12 -6
  96. novel_downloader/models/search.py +16 -0
  97. novel_downloader/models/types.py +0 -2
  98. novel_downloader/resources/config/settings.toml +31 -4
  99. novel_downloader/resources/css_styles/intro.css +83 -0
  100. novel_downloader/resources/css_styles/main.css +30 -89
  101. novel_downloader/utils/__init__.py +52 -0
  102. novel_downloader/utils/chapter_storage.py +244 -224
  103. novel_downloader/utils/constants.py +1 -21
  104. novel_downloader/utils/epub/__init__.py +34 -0
  105. novel_downloader/utils/epub/builder.py +377 -0
  106. novel_downloader/utils/epub/constants.py +77 -0
  107. novel_downloader/utils/epub/documents.py +403 -0
  108. novel_downloader/utils/epub/models.py +134 -0
  109. novel_downloader/utils/epub/utils.py +212 -0
  110. novel_downloader/utils/file_utils/__init__.py +10 -14
  111. novel_downloader/utils/file_utils/io.py +20 -51
  112. novel_downloader/utils/file_utils/normalize.py +2 -2
  113. novel_downloader/utils/file_utils/sanitize.py +2 -3
  114. novel_downloader/utils/fontocr/__init__.py +5 -5
  115. novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
  116. novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
  117. novel_downloader/utils/fontocr/ocr_v1.py +13 -1
  118. novel_downloader/utils/fontocr/ocr_v2.py +13 -1
  119. novel_downloader/utils/fontocr/ocr_v3.py +744 -0
  120. novel_downloader/utils/i18n.py +2 -0
  121. novel_downloader/utils/logger.py +2 -0
  122. novel_downloader/utils/network.py +110 -251
  123. novel_downloader/utils/state.py +1 -0
  124. novel_downloader/utils/text_utils/__init__.py +18 -17
  125. novel_downloader/utils/text_utils/diff_display.py +4 -5
  126. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  127. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  128. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  129. novel_downloader/utils/time_utils/__init__.py +3 -3
  130. novel_downloader/utils/time_utils/datetime_utils.py +4 -5
  131. novel_downloader/utils/time_utils/sleep_utils.py +2 -3
  132. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
  133. novel_downloader-1.5.0.dist-info/RECORD +164 -0
  134. novel_downloader/config/site_rules.py +0 -94
  135. novel_downloader/core/factory/__init__.py +0 -20
  136. novel_downloader/core/factory/downloader.py +0 -73
  137. novel_downloader/core/factory/exporter.py +0 -58
  138. novel_downloader/core/factory/fetcher.py +0 -96
  139. novel_downloader/core/factory/parser.py +0 -86
  140. novel_downloader/core/fetchers/common/__init__.py +0 -14
  141. novel_downloader/core/fetchers/common/browser.py +0 -79
  142. novel_downloader/core/fetchers/common/session.py +0 -79
  143. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  144. novel_downloader/core/parsers/common/__init__.py +0 -13
  145. novel_downloader/core/parsers/common/helper.py +0 -323
  146. novel_downloader/core/parsers/common/main_parser.py +0 -106
  147. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  148. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  149. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  150. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  151. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  152. novel_downloader/models/browser.py +0 -21
  153. novel_downloader/models/site_rules.py +0 -99
  154. novel_downloader/models/tasks.py +0 -33
  155. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  156. novel_downloader/resources/json/replace_word_map.json +0 -4
  157. novel_downloader/resources/text/blacklist.txt +0 -22
  158. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  159. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  160. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  161. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  162. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
  163. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
  164. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
  165. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.qianbi
4
+ -----------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+ import re
10
+
11
+ from lxml import html
12
+
13
+ from novel_downloader.core.searchers.base import BaseSearcher
14
+ from novel_downloader.core.searchers.registry import register_searcher
15
+ from novel_downloader.models import SearchResult
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @register_searcher(
21
+ site_keys=["qianbi"],
22
+ )
23
+ class QianbiSearcher(BaseSearcher):
24
+ site_name = "qianbi"
25
+ priority = 3
26
+ SEARCH_URL = "https://www.23qb.com/search.html"
27
+
28
+ @classmethod
29
+ def _fetch_html(cls, keyword: str) -> str:
30
+ """
31
+ Fetch raw HTML from Qianbi's search page.
32
+
33
+ :param keyword: The search term to query on Qianbi.
34
+ :return: HTML text of the search results page, or an empty string on fail.
35
+ """
36
+ params = {"searchkey": keyword}
37
+ try:
38
+ response = cls._http_get(cls.SEARCH_URL, params=params)
39
+ return response.text
40
+ except Exception:
41
+ logger.error(
42
+ "Failed to fetch HTML for keyword '%s' from '%s'",
43
+ keyword,
44
+ cls.SEARCH_URL,
45
+ exc_info=True,
46
+ )
47
+ return ""
48
+
49
+ @classmethod
50
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
51
+ """
52
+ Parse raw HTML from Qianbi search results into list of SearchResult.
53
+
54
+ :param html_str: Raw HTML string from Qianbi search results page.
55
+ :param limit: Maximum number of results to return, or None for all.
56
+ :return: List of SearchResult dicts.
57
+ """
58
+ if html_str.find('<meta property="og:url"') != -1:
59
+ return cls._parse_detail_html(html_str)
60
+ return cls._parse_search_list_html(html_str, limit)
61
+
62
+ @classmethod
63
+ def _parse_detail_html(cls, html_str: str) -> list[SearchResult]:
64
+ """
65
+ Parse a single-book detail page, detected via <meta property="og:url">.
66
+
67
+ :param html_str: Raw HTML of the book detail page.
68
+ :return: A single-element list with the book's SearchResult.
69
+ """
70
+ doc = html.fromstring(html_str)
71
+ url = doc.xpath('//meta[@property="og:url"]/@content')
72
+ if not url:
73
+ return []
74
+
75
+ # extract book_id via regex
76
+ m = re.search(r"/book/(\d+)/", url[0])
77
+ book_id = m.group(1) if m else ""
78
+ # title from <h1 class="page-title">
79
+ title = (doc.xpath('//h1[@class="page-title"]/text()') or [""])[0].strip()
80
+ author = (doc.xpath('//a[contains(@href,"/author/")]/@title') or [""])[
81
+ 0
82
+ ].strip()
83
+
84
+ return [
85
+ SearchResult(
86
+ site=cls.site_name,
87
+ book_id=book_id,
88
+ title=title,
89
+ author=author,
90
+ priority=cls.priority,
91
+ )
92
+ ]
93
+
94
+ @classmethod
95
+ def _parse_search_list_html(
96
+ cls, html_str: str, limit: int | None
97
+ ) -> list[SearchResult]:
98
+ """
99
+ Parse a multi-item search result page.
100
+
101
+ :param html_str: Raw HTML of the search-results page.
102
+ :param limit: Maximum number of items to return, or None for all.
103
+ :return: List of SearchResult.
104
+ """
105
+ doc = html.fromstring(html_str)
106
+ items = doc.xpath('//div[contains(@class,"module-search-item")]')
107
+ results: list[SearchResult] = []
108
+
109
+ for idx, item in enumerate(items):
110
+ if limit is not None and idx >= limit:
111
+ break
112
+ # Title and book_id
113
+ link = item.xpath('.//div[@class="novel-info-header"]/h3/a')[0]
114
+ title = link.text_content().strip()
115
+ href = link.get("href", "").strip("/")
116
+ book_id = href.replace("book/", "").strip("/")
117
+ # Author is not present on the page
118
+ author = ""
119
+ # Compute priority
120
+ prio = cls.priority + idx
121
+
122
+ results.append(
123
+ SearchResult(
124
+ site=cls.site_name,
125
+ book_id=book_id,
126
+ title=title,
127
+ author=author,
128
+ priority=prio,
129
+ )
130
+ )
131
+ return results
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.qidian
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.searchers.base import BaseSearcher
13
+ from novel_downloader.models import SearchResult
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ # @register_searcher(
19
+ # site_keys=["qidian", "qd"],
20
+ # )
21
+ class QidianSearcher(BaseSearcher):
22
+ """
23
+ TODO: 现在默认没有 cookie 会跳转
24
+ """
25
+
26
+ site_name = "qidian"
27
+ priority = 0
28
+ SEARCH_URL = "https://www.qidian.com/so/{query}.html"
29
+
30
+ @classmethod
31
+ def _fetch_html(cls, keyword: str) -> str:
32
+ """
33
+ Fetch raw HTML from Qidian's search page.
34
+
35
+ :param keyword: The search term to query on Qidian.
36
+ :return: HTML text of the search results page, or an empty string on fail.
37
+ """
38
+ url = cls.SEARCH_URL.format(query=cls._quote(keyword))
39
+ try:
40
+ response = cls._http_get(url)
41
+ return response.text
42
+ except Exception:
43
+ logger.error(
44
+ "Failed to fetch HTML for keyword '%s' from '%s'",
45
+ keyword,
46
+ url,
47
+ exc_info=True,
48
+ )
49
+ return ""
50
+
51
+ @classmethod
52
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
53
+ """
54
+ Parse raw HTML from Qidian search results into list of SearchResult.
55
+
56
+ :param html_str: Raw HTML string from Qidian search results page.
57
+ :param limit: Maximum number of results to return, or None for all.
58
+ :return: List of SearchResult dicts.
59
+ """
60
+ doc = html.fromstring(html_str)
61
+ items = doc.xpath(
62
+ '//div[@id="result-list"]//li[contains(@class, "res-book-item")]'
63
+ )
64
+ results: list[SearchResult] = []
65
+
66
+ base_prio = getattr(cls, "priority", 0)
67
+ for idx, item in enumerate(items):
68
+ if limit is not None and idx >= limit:
69
+ break
70
+ book_id = item.get("data-bid")
71
+ title_elem = item.xpath('.//h3[@class="book-info-title"]/a')[0]
72
+ title = title_elem.text_content().strip()
73
+ author_nodes = item.xpath(
74
+ './/p[@class="author"]/a[@class="name"] | .//p[@class="author"]/i'
75
+ )
76
+ author = author_nodes[0].text_content().strip() if author_nodes else ""
77
+ prio = base_prio + idx
78
+ results.append(
79
+ SearchResult(
80
+ site=cls.site_name,
81
+ book_id=book_id,
82
+ title=title,
83
+ author=author,
84
+ priority=prio,
85
+ )
86
+ )
87
+ return results
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.searchers.registry
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ __all__ = ["register_searcher", "search"]
9
+
10
+ from collections.abc import Callable, Sequence
11
+ from typing import TypeVar
12
+
13
+ from novel_downloader.core.searchers.base import BaseSearcher
14
+ from novel_downloader.models import SearchResult
15
+
16
+ S = TypeVar("S", bound=BaseSearcher)
17
+
18
+ _SEARCHER_REGISTRY: dict[str, type[BaseSearcher]] = {}
19
+
20
+
21
+ def register_searcher(
22
+ site_keys: Sequence[str],
23
+ ) -> Callable[[type[S]], type[S]]:
24
+ """
25
+ Decorator to register a searcher class under given name.
26
+ """
27
+
28
+ def decorator(cls: type[S]) -> type[S]:
29
+ for key in site_keys:
30
+ _SEARCHER_REGISTRY[key] = cls
31
+ return cls
32
+
33
+ return decorator
34
+
35
+
36
+ def search(
37
+ keyword: str,
38
+ sites: Sequence[str] | None = None,
39
+ limit: int | None = None,
40
+ per_site_limit: int = 5,
41
+ ) -> list[SearchResult]:
42
+ """
43
+ Perform a search for the given keyword across one or more registered sites,
44
+ then aggregate and sort the results by their `priority` value.
45
+
46
+ :param keyword: The search term or keyword to query.
47
+ :param sites: An optional sequence of site keys to limit which searchers.
48
+ :param limit: Maximum total number of results to return; if None, return all.
49
+ :param per_site_limit: Maximum number of search results per site.
50
+ :return: A flat list of `SearchResult` objects.
51
+ """
52
+ keys = list(sites or _SEARCHER_REGISTRY.keys())
53
+ to_call = {_SEARCHER_REGISTRY[key] for key in keys if key in _SEARCHER_REGISTRY}
54
+
55
+ results: list[SearchResult] = []
56
+ for cls in to_call:
57
+ try:
58
+ results.extend(cls.search(keyword, limit=per_site_limit))
59
+ except Exception:
60
+ continue
61
+
62
+ results.sort(key=lambda res: res["priority"])
63
+ return results[:limit] if limit is not None else results
@@ -8,7 +8,6 @@
8
8
  "settings_help": "Configure downloader settings.",
9
9
  "settings_set_lang_help": "Switch language between Chinese and English.",
10
10
  "settings_set_config_help": "Set and save a custom YAML configuration file.",
11
- "settings_update_rules_help": "Update site rules from a TOML/YAML/JSON file.",
12
11
  "settings_init_help": "Initialize default config and rule files in the current directory.",
13
12
  "settings_init_force_help": "Force overwrite if file already exists.",
14
13
  "settings_init_exists": "File already exists: {filename}",
@@ -20,8 +19,6 @@
20
19
  "settings_set_lang": "Language switched to {lang}",
21
20
  "settings_set_config": "Configuration file saved from {path}",
22
21
  "settings_set_config_fail": "Failed to save config file: {err}",
23
- "settings_update_rules": "Site rules updated from {path}",
24
- "settings_update_rules_fail": "Failed to update site rules: {err}",
25
22
  "settings_set_cookies_help": "Set cookies for a specific site.",
26
23
  "settings_set_cookies_prompt_site": "Site identifier (e.g. 'qidian')",
27
24
  "settings_set_cookies_prompt_payload": "Cookie payload (JSON or 'k=v; k2=v2')",
@@ -61,10 +58,12 @@
61
58
  "download_site_mode": "Mode: {mode}",
62
59
  "download_no_ids": "No book IDs provided. Exiting.",
63
60
  "download_fail_get_ids": "Failed to get book IDs from config: {err}",
61
+ "download_config_load_fail": "Failed to load config: {err}",
64
62
  "download_only_example": "Only example book IDs found (e.g. '{example}').",
65
63
  "download_edit_config": "Please edit your config and replace them with real book IDs.",
66
64
  "download_downloading": "Downloading book {book_id} from {site}...",
67
65
  "download_prompt_parse": "Parse...",
66
+ "download_login_failed": "Download login failed: please check your cookies or account credentials and try again.",
68
67
  "download_book_ids": "One or more book IDs to process",
69
68
  "download_option_start": "Start chapter ID (applies to the first book ID only)",
70
69
  "download_option_end": "End chapter ID (applies to the first book ID only)",
@@ -114,5 +113,14 @@
114
113
  "export_success_txt": "Successfully exported {book_id} as TXT.",
115
114
  "export_failed_txt": "Failed to export {book_id} as TXT: {err}",
116
115
  "export_success_epub": "Successfully exported {book_id} as EPUB.",
117
- "export_failed_epub": "Failed to export {book_id} as EPUB: {err}"
116
+ "export_failed_epub": "Failed to export {book_id} as EPUB: {err}",
117
+
118
+ "help_search": "search for a book on one or more sites",
119
+ "help_search_sites": "which site keys to search (default: all)",
120
+ "help_search_keyword": "keyword to look for",
121
+ "help_search_limit": "Maximum number of search results",
122
+ "help_search_site_limit": "Maximum number of search results per site",
123
+ "no_results": "No results found.",
124
+ "prompt_select_index": "Select a result by number (or press Enter to cancel): ",
125
+ "invalid_selection": "Invalid choice, please try again."
118
126
  }
@@ -8,7 +8,6 @@
8
8
  "settings_help": "配置下载器设置",
9
9
  "settings_set_lang_help": "在中文和英文之间切换语言",
10
10
  "settings_set_config_help": "设置并保存自定义 YAML 配置文件",
11
- "settings_update_rules_help": "从 TOML/YAML/JSON 文件更新站点规则",
12
11
  "settings_init_help": "在当前目录初始化默认配置和规则文件",
13
12
  "settings_init_force_help": "如果文件已存在则强制覆盖",
14
13
  "settings_init_exists": "文件已存在: {filename}",
@@ -20,8 +19,6 @@
20
19
  "settings_set_lang": "语言已切换为 {lang}",
21
20
  "settings_set_config": "已从 {path} 保存配置文件",
22
21
  "settings_set_config_fail": "保存配置文件失败: {err}",
23
- "settings_update_rules": "已从 {path} 更新站点规则",
24
- "settings_update_rules_fail": "更新站点规则失败: {err}",
25
22
  "settings_set_cookies_help": "为特定站点设置 Cookie",
26
23
  "settings_set_cookies_prompt_site": "站点标识 (例如 'qidian')",
27
24
  "settings_set_cookies_prompt_payload": "Cookie 内容 (JSON 或 'k=v; k2=v2')",
@@ -61,9 +58,11 @@
61
58
  "download_site_mode": "使用模式: {mode}",
62
59
  "download_no_ids": "未提供书籍 ID, 正在退出",
63
60
  "download_fail_get_ids": "从配置获取书籍 ID 失败: {err}",
61
+ "download_config_load_fail": "加载配置失败: {err}",
64
62
  "download_only_example": "只发现示例书籍 ID (例如 '{example}')",
65
63
  "download_edit_config": "请编辑配置并将示例 ID 替换为真实书籍 ID",
66
64
  "download_downloading": "正在从 {site} 下载书籍 {book_id}...",
65
+ "download_login_failed": "登录失败: 请检查您的 Cookie 或账户信息后重试",
67
66
  "download_prompt_parse": "结束...",
68
67
  "download_book_ids": "要处理的一个或多个小说 ID",
69
68
  "download_option_start": "起始章节 ID (仅用于第一个书籍 ID)",
@@ -114,5 +113,14 @@
114
113
  "export_success_txt": "成功将 {book_id} 导出为 TXT。",
115
114
  "export_failed_txt": "导出 {book_id} 为 TXT 失败: {err}",
116
115
  "export_success_epub": "成功将 {book_id} 导出为 EPUB",
117
- "export_failed_epub": "导出 {book_id} 为 EPUB 失败: {err}"
116
+ "export_failed_epub": "导出 {book_id} 为 EPUB 失败: {err}",
117
+
118
+ "help_search": "在一个或多个站点搜索书籍",
119
+ "help_search_sites": "要搜索的站点键 (默认为全部)",
120
+ "help_search_keyword": "要搜索的关键字",
121
+ "help_search_limit": "总体搜索结果数量上限",
122
+ "help_search_site_limit": "单站点搜索结果数量上限",
123
+ "no_results": "未找到结果",
124
+ "prompt_select_index": "通过编号选择结果 (或按回车取消): ",
125
+ "invalid_selection": "无效选择, 请重试。"
118
126
  }
@@ -5,7 +5,6 @@ novel_downloader.models
5
5
 
6
6
  """
7
7
 
8
- from .browser import NewContextOptions
9
8
  from .chapter import ChapterDict
10
9
  from .config import (
11
10
  BookConfig,
@@ -13,54 +12,29 @@ from .config import (
13
12
  ExporterConfig,
14
13
  FetcherConfig,
15
14
  ParserConfig,
15
+ TextCleanerConfig,
16
16
  )
17
17
  from .login import LoginField
18
- from .site_rules import (
19
- BookInfoRules,
20
- FieldRules,
21
- RuleStep,
22
- SiteProfile,
23
- SiteRules,
24
- SiteRulesDict,
25
- VolumesRules,
26
- )
27
- from .tasks import (
28
- CidTask,
29
- HtmlTask,
30
- RestoreTask,
31
- )
18
+ from .search import SearchResult
32
19
  from .types import (
33
20
  BrowserType,
34
21
  LogLevel,
35
22
  ModeType,
36
- SaveMode,
37
23
  SplitMode,
38
- StorageBackend,
39
24
  )
40
25
 
41
26
  __all__ = [
42
- "NewContextOptions",
43
27
  "BookConfig",
44
28
  "DownloaderConfig",
45
29
  "ParserConfig",
46
30
  "FetcherConfig",
47
31
  "ExporterConfig",
32
+ "TextCleanerConfig",
48
33
  "ChapterDict",
49
34
  "LoginField",
35
+ "SearchResult",
50
36
  "BrowserType",
51
37
  "ModeType",
52
- "SaveMode",
53
- "StorageBackend",
54
38
  "SplitMode",
55
39
  "LogLevel",
56
- "BookInfoRules",
57
- "FieldRules",
58
- "RuleStep",
59
- "SiteProfile",
60
- "SiteRules",
61
- "SiteRulesDict",
62
- "VolumesRules",
63
- "CidTask",
64
- "HtmlTask",
65
- "RestoreTask",
66
40
  ]
@@ -16,14 +16,13 @@ These models are used to map loaded YAML or JSON config data into
16
16
  strongly typed Python objects for safer and cleaner access.
17
17
  """
18
18
 
19
- from dataclasses import dataclass
19
+ from dataclasses import dataclass, field
20
20
  from typing import NotRequired, TypedDict
21
21
 
22
22
  from .types import (
23
23
  BrowserType,
24
24
  ModeType,
25
25
  SplitMode,
26
- StorageBackend,
27
26
  )
28
27
 
29
28
 
@@ -52,13 +51,11 @@ class DownloaderConfig:
52
51
  backoff_factor: float = 2.0
53
52
  raw_data_dir: str = "./raw_data"
54
53
  cache_dir: str = "./novel_cache"
55
- download_workers: int = 4
56
- parser_workers: int = 4
54
+ workers: int = 4
57
55
  skip_existing: bool = True
58
56
  login_required: bool = False
59
57
  save_html: bool = False
60
58
  mode: ModeType = "session"
61
- storage_backend: StorageBackend = "json"
62
59
  storage_batch_size: int = 1
63
60
  username: str = ""
64
61
  password: str = ""
@@ -83,12 +80,20 @@ class ParserConfig:
83
80
  mode: ModeType = "session"
84
81
 
85
82
 
83
+ @dataclass
84
+ class TextCleanerConfig:
85
+ remove_invisible: bool = True
86
+ title_remove_patterns: list[str] = field(default_factory=list)
87
+ title_replacements: dict[str, str] = field(default_factory=dict)
88
+ content_remove_patterns: list[str] = field(default_factory=list)
89
+ content_replacements: dict[str, str] = field(default_factory=dict)
90
+
91
+
86
92
  @dataclass
87
93
  class ExporterConfig:
88
94
  cache_dir: str = "./novel_cache"
89
95
  raw_data_dir: str = "./raw_data"
90
96
  output_dir: str = "./downloads"
91
- storage_backend: StorageBackend = "json"
92
97
  clean_text: bool = True
93
98
  make_txt: bool = True
94
99
  make_epub: bool = False
@@ -100,6 +105,7 @@ class ExporterConfig:
100
105
  include_toc: bool = False
101
106
  include_picture: bool = False
102
107
  split_mode: SplitMode = "book"
108
+ cleaner_cfg: TextCleanerConfig = field(default_factory=TextCleanerConfig)
103
109
 
104
110
 
105
111
  class BookConfig(TypedDict):
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.models.search
4
+ ------------------------------
5
+
6
+ """
7
+
8
+ from typing import TypedDict
9
+
10
+
11
+ class SearchResult(TypedDict, total=True):
12
+ site: str
13
+ book_id: str
14
+ title: str
15
+ author: str
16
+ priority: int
@@ -8,8 +8,6 @@ novel_downloader.models.types
8
8
  from typing import Literal
9
9
 
10
10
  ModeType = Literal["browser", "session"]
11
- SaveMode = Literal["overwrite", "skip"]
12
- StorageBackend = Literal["json", "sqlite"]
13
11
  SplitMode = Literal["book", "volume"]
14
12
  LogLevel = Literal["DEBUG", "INFO", "WARNING", "ERROR"]
15
13
  BrowserType = Literal["chromium", "firefox", "webkit"]
@@ -21,11 +21,9 @@ request_interval = 2.0 # 同一本书各章节请求间隔 (秒)
21
21
  raw_data_dir = "./raw_data" # 原始章节 JSON/DB 存放目录
22
22
  output_dir = "./downloads" # 最终输出文件存放目录
23
23
  cache_dir = "./novel_cache" # 本地缓存目录 (字体 / 图片等)
24
- download_workers = 2 # 并发下载线程数
25
- parser_workers = 2 # 并发解析线程数
24
+ workers = 2 # 工作协程数
26
25
  skip_existing = true # 是否跳过已存在章节
27
- storage_backend = "sqlite" # 章节储存方法: json / sqlite
28
- storage_batch_size = 30 # SQLite 批量提交的章节数量
26
+ storage_batch_size = 1 # SQLite 批量提交的章节数量
29
27
 
30
28
  [general.debug]
31
29
  save_html = false # 是否将抓取到的原始 HTML 保留到磁盘
@@ -129,3 +127,32 @@ filename_template = "{title}_{author}" # 文件命名规则
129
127
  include_cover = true # 是否在 EPUB 中包含封面
130
128
  include_toc = false # 是否自动生成目录
131
129
  include_picture = false # 是否下载章节图片 (体积较大)
130
+
131
+ [cleaner]
132
+ remove_invisible = true
133
+
134
+ [cleaner.title]
135
+ remove_patterns = [
136
+ '【[^】]*?】',
137
+ '[((][^()()]*?求票[^()()]*?[))]',
138
+ ]
139
+
140
+ [cleaner.title.replace]
141
+ ':' = ':'
142
+
143
+ [cleaner.title.external]
144
+ enabled = false
145
+ remove_patterns = "path/to/title-remove.json"
146
+ replace = "path/to/title-replace.json"
147
+
148
+ [cleaner.content]
149
+ remove_patterns = []
150
+
151
+ [cleaner.content.replace]
152
+ 'li子' = '例子'
153
+ 'pinbi词' = '屏蔽词'
154
+
155
+ [cleaner.content.external]
156
+ enabled = false
157
+ remove_patterns = "path/to/content-remove.json"
158
+ replace = "path/to/content-replace.json"
@@ -0,0 +1,83 @@
1
+ body {
2
+ font-family: serif;
3
+ line-height: 1.5;
4
+ height: 95%;
5
+ margin: 2em;
6
+ }
7
+
8
+ h1 {
9
+ font-size: 2em;
10
+ text-align: center;
11
+ margin-bottom: 1em;
12
+ }
13
+
14
+ p.new-page-after {
15
+ page-break-after: always;
16
+ margin: 0;
17
+ padding: 0;
18
+ }
19
+
20
+ .intro-info {
21
+ list-style: none;
22
+ padding: 0;
23
+ margin: 0 0 2em;
24
+ }
25
+ .intro-info li {
26
+ margin: 0.5em 0;
27
+ font-size: 1.1em;
28
+ }
29
+
30
+ .intro-summary {
31
+ margin-top: 1em;
32
+ font-size: 1em;
33
+ }
34
+
35
+ .vol-header {
36
+ display: flex;
37
+ flex-direction: column;
38
+ justify-content: center;
39
+ align-items: center;
40
+ margin: 0;
41
+ }
42
+
43
+ .vol-border {
44
+ width: 100%;
45
+ text-align: center;
46
+ margin: 0 auto 0 auto;
47
+ text-indent: 0em;
48
+ }
49
+ .vol-border.flip {
50
+ transform: rotate(180deg);
51
+ -ms-transform: rotate(180deg);
52
+ -moz-transform: rotate(180deg);
53
+ -webkit-transform: rotate(180deg);
54
+ -o-transform: rotate(180deg);
55
+ }
56
+
57
+ .vol-title-main,
58
+ .vol-title-sub {
59
+ font-weight: bold;
60
+ text-align: center;
61
+ text-indent: 0em;
62
+ color: #6e471c;
63
+ margin: 0.25em 0;
64
+ display: block;
65
+ color: #6e471c;
66
+ text-shadow: 1px 1px 2px rgba(255,255,255,0.8);
67
+ }
68
+ .vol-title-main {
69
+ font-size: 1.25rem;
70
+ }
71
+ .vol-title-sub {
72
+ font-size: 1rem;
73
+ }
74
+
75
+ .volume-intro-text {
76
+ margin: 1em 0;
77
+ padding: 0 1em;
78
+ text-align: justify;
79
+ }
80
+ .volume-intro-text p {
81
+ margin: 0.5em 0;
82
+ line-height: 1.6;
83
+ }