novel-downloader 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -2
- novel_downloader/cli/config.py +1 -83
- novel_downloader/cli/download.py +4 -5
- novel_downloader/cli/export.py +4 -1
- novel_downloader/cli/main.py +2 -0
- novel_downloader/cli/search.py +123 -0
- novel_downloader/config/__init__.py +3 -10
- novel_downloader/config/adapter.py +190 -54
- novel_downloader/config/loader.py +2 -3
- novel_downloader/core/__init__.py +13 -13
- novel_downloader/core/downloaders/__init__.py +10 -11
- novel_downloader/core/downloaders/base.py +152 -26
- novel_downloader/core/downloaders/biquge.py +5 -1
- novel_downloader/core/downloaders/common.py +157 -378
- novel_downloader/core/downloaders/esjzone.py +5 -1
- novel_downloader/core/downloaders/linovelib.py +5 -1
- novel_downloader/core/downloaders/qianbi.py +291 -4
- novel_downloader/core/downloaders/qidian.py +199 -285
- novel_downloader/core/downloaders/registry.py +67 -0
- novel_downloader/core/downloaders/sfacg.py +5 -1
- novel_downloader/core/downloaders/yamibo.py +5 -1
- novel_downloader/core/exporters/__init__.py +10 -11
- novel_downloader/core/exporters/base.py +87 -7
- novel_downloader/core/exporters/biquge.py +5 -8
- novel_downloader/core/exporters/common/__init__.py +2 -2
- novel_downloader/core/exporters/common/epub.py +82 -166
- novel_downloader/core/exporters/common/main_exporter.py +0 -60
- novel_downloader/core/exporters/common/txt.py +82 -83
- novel_downloader/core/exporters/epub_util.py +157 -1330
- novel_downloader/core/exporters/esjzone.py +5 -8
- novel_downloader/core/exporters/linovelib/__init__.py +2 -2
- novel_downloader/core/exporters/linovelib/epub.py +157 -212
- novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
- novel_downloader/core/exporters/linovelib/txt.py +67 -63
- novel_downloader/core/exporters/qianbi.py +5 -8
- novel_downloader/core/exporters/qidian.py +14 -4
- novel_downloader/core/exporters/registry.py +53 -0
- novel_downloader/core/exporters/sfacg.py +5 -8
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/exporters/yamibo.py +5 -8
- novel_downloader/core/fetchers/__init__.py +19 -24
- novel_downloader/core/fetchers/base/__init__.py +3 -3
- novel_downloader/core/fetchers/base/browser.py +23 -4
- novel_downloader/core/fetchers/base/session.py +30 -5
- novel_downloader/core/fetchers/biquge/__init__.py +3 -3
- novel_downloader/core/fetchers/biquge/browser.py +5 -0
- novel_downloader/core/fetchers/biquge/session.py +6 -1
- novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
- novel_downloader/core/fetchers/esjzone/browser.py +5 -0
- novel_downloader/core/fetchers/esjzone/session.py +6 -1
- novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
- novel_downloader/core/fetchers/linovelib/browser.py +6 -1
- novel_downloader/core/fetchers/linovelib/session.py +6 -1
- novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
- novel_downloader/core/fetchers/qianbi/browser.py +5 -0
- novel_downloader/core/fetchers/qianbi/session.py +5 -0
- novel_downloader/core/fetchers/qidian/__init__.py +3 -3
- novel_downloader/core/fetchers/qidian/browser.py +12 -4
- novel_downloader/core/fetchers/qidian/session.py +11 -3
- novel_downloader/core/fetchers/registry.py +71 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
- novel_downloader/core/fetchers/sfacg/browser.py +5 -0
- novel_downloader/core/fetchers/sfacg/session.py +5 -0
- novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
- novel_downloader/core/fetchers/yamibo/browser.py +5 -0
- novel_downloader/core/fetchers/yamibo/session.py +6 -1
- novel_downloader/core/interfaces/__init__.py +7 -5
- novel_downloader/core/interfaces/searcher.py +18 -0
- novel_downloader/core/parsers/__init__.py +10 -11
- novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
- novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
- novel_downloader/core/parsers/qidian/main_parser.py +10 -21
- novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/registry.py +68 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
- novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
- novel_downloader/core/searchers/__init__.py +20 -0
- novel_downloader/core/searchers/base.py +92 -0
- novel_downloader/core/searchers/biquge.py +83 -0
- novel_downloader/core/searchers/esjzone.py +84 -0
- novel_downloader/core/searchers/qianbi.py +131 -0
- novel_downloader/core/searchers/qidian.py +87 -0
- novel_downloader/core/searchers/registry.py +63 -0
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +12 -4
- novel_downloader/models/__init__.py +4 -30
- novel_downloader/models/config.py +12 -6
- novel_downloader/models/search.py +16 -0
- novel_downloader/models/types.py +0 -2
- novel_downloader/resources/config/settings.toml +31 -4
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/utils/__init__.py +52 -0
- novel_downloader/utils/chapter_storage.py +244 -224
- novel_downloader/utils/constants.py +1 -21
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +77 -0
- novel_downloader/utils/epub/documents.py +403 -0
- novel_downloader/utils/epub/models.py +134 -0
- novel_downloader/utils/epub/utils.py +212 -0
- novel_downloader/utils/file_utils/__init__.py +10 -14
- novel_downloader/utils/file_utils/io.py +20 -51
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -3
- novel_downloader/utils/fontocr/__init__.py +5 -5
- novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
- novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
- novel_downloader/utils/fontocr/ocr_v1.py +13 -1
- novel_downloader/utils/fontocr/ocr_v2.py +13 -1
- novel_downloader/utils/fontocr/ocr_v3.py +744 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +2 -0
- novel_downloader/utils/network.py +110 -251
- novel_downloader/utils/state.py +1 -0
- novel_downloader/utils/text_utils/__init__.py +18 -17
- novel_downloader/utils/text_utils/diff_display.py +4 -5
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +3 -3
- novel_downloader/utils/time_utils/datetime_utils.py +4 -5
- novel_downloader/utils/time_utils/sleep_utils.py +2 -3
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
- novel_downloader-1.5.0.dist-info/RECORD +164 -0
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/common/browser.py +0 -79
- novel_downloader/core/fetchers/common/session.py +0 -79
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.4.dist-info/RECORD +0 -165
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,131 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.qianbi
|
4
|
+
-----------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import re
|
10
|
+
|
11
|
+
from lxml import html
|
12
|
+
|
13
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
14
|
+
from novel_downloader.core.searchers.registry import register_searcher
|
15
|
+
from novel_downloader.models import SearchResult
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
@register_searcher(
|
21
|
+
site_keys=["qianbi"],
|
22
|
+
)
|
23
|
+
class QianbiSearcher(BaseSearcher):
|
24
|
+
site_name = "qianbi"
|
25
|
+
priority = 3
|
26
|
+
SEARCH_URL = "https://www.23qb.com/search.html"
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
def _fetch_html(cls, keyword: str) -> str:
|
30
|
+
"""
|
31
|
+
Fetch raw HTML from Qianbi's search page.
|
32
|
+
|
33
|
+
:param keyword: The search term to query on Qianbi.
|
34
|
+
:return: HTML text of the search results page, or an empty string on fail.
|
35
|
+
"""
|
36
|
+
params = {"searchkey": keyword}
|
37
|
+
try:
|
38
|
+
response = cls._http_get(cls.SEARCH_URL, params=params)
|
39
|
+
return response.text
|
40
|
+
except Exception:
|
41
|
+
logger.error(
|
42
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
43
|
+
keyword,
|
44
|
+
cls.SEARCH_URL,
|
45
|
+
exc_info=True,
|
46
|
+
)
|
47
|
+
return ""
|
48
|
+
|
49
|
+
@classmethod
|
50
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
51
|
+
"""
|
52
|
+
Parse raw HTML from Qianbi search results into list of SearchResult.
|
53
|
+
|
54
|
+
:param html_str: Raw HTML string from Qianbi search results page.
|
55
|
+
:param limit: Maximum number of results to return, or None for all.
|
56
|
+
:return: List of SearchResult dicts.
|
57
|
+
"""
|
58
|
+
if html_str.find('<meta property="og:url"') != -1:
|
59
|
+
return cls._parse_detail_html(html_str)
|
60
|
+
return cls._parse_search_list_html(html_str, limit)
|
61
|
+
|
62
|
+
@classmethod
|
63
|
+
def _parse_detail_html(cls, html_str: str) -> list[SearchResult]:
|
64
|
+
"""
|
65
|
+
Parse a single-book detail page, detected via <meta property="og:url">.
|
66
|
+
|
67
|
+
:param html_str: Raw HTML of the book detail page.
|
68
|
+
:return: A single-element list with the book's SearchResult.
|
69
|
+
"""
|
70
|
+
doc = html.fromstring(html_str)
|
71
|
+
url = doc.xpath('//meta[@property="og:url"]/@content')
|
72
|
+
if not url:
|
73
|
+
return []
|
74
|
+
|
75
|
+
# extract book_id via regex
|
76
|
+
m = re.search(r"/book/(\d+)/", url[0])
|
77
|
+
book_id = m.group(1) if m else ""
|
78
|
+
# title from <h1 class="page-title">
|
79
|
+
title = (doc.xpath('//h1[@class="page-title"]/text()') or [""])[0].strip()
|
80
|
+
author = (doc.xpath('//a[contains(@href,"/author/")]/@title') or [""])[
|
81
|
+
0
|
82
|
+
].strip()
|
83
|
+
|
84
|
+
return [
|
85
|
+
SearchResult(
|
86
|
+
site=cls.site_name,
|
87
|
+
book_id=book_id,
|
88
|
+
title=title,
|
89
|
+
author=author,
|
90
|
+
priority=cls.priority,
|
91
|
+
)
|
92
|
+
]
|
93
|
+
|
94
|
+
@classmethod
|
95
|
+
def _parse_search_list_html(
|
96
|
+
cls, html_str: str, limit: int | None
|
97
|
+
) -> list[SearchResult]:
|
98
|
+
"""
|
99
|
+
Parse a multi-item search result page.
|
100
|
+
|
101
|
+
:param html_str: Raw HTML of the search-results page.
|
102
|
+
:param limit: Maximum number of items to return, or None for all.
|
103
|
+
:return: List of SearchResult.
|
104
|
+
"""
|
105
|
+
doc = html.fromstring(html_str)
|
106
|
+
items = doc.xpath('//div[contains(@class,"module-search-item")]')
|
107
|
+
results: list[SearchResult] = []
|
108
|
+
|
109
|
+
for idx, item in enumerate(items):
|
110
|
+
if limit is not None and idx >= limit:
|
111
|
+
break
|
112
|
+
# Title and book_id
|
113
|
+
link = item.xpath('.//div[@class="novel-info-header"]/h3/a')[0]
|
114
|
+
title = link.text_content().strip()
|
115
|
+
href = link.get("href", "").strip("/")
|
116
|
+
book_id = href.replace("book/", "").strip("/")
|
117
|
+
# Author is not present on the page
|
118
|
+
author = ""
|
119
|
+
# Compute priority
|
120
|
+
prio = cls.priority + idx
|
121
|
+
|
122
|
+
results.append(
|
123
|
+
SearchResult(
|
124
|
+
site=cls.site_name,
|
125
|
+
book_id=book_id,
|
126
|
+
title=title,
|
127
|
+
author=author,
|
128
|
+
priority=prio,
|
129
|
+
)
|
130
|
+
)
|
131
|
+
return results
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.qidian
|
4
|
+
--------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from lxml import html
|
11
|
+
|
12
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
13
|
+
from novel_downloader.models import SearchResult
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
# @register_searcher(
|
19
|
+
# site_keys=["qidian", "qd"],
|
20
|
+
# )
|
21
|
+
class QidianSearcher(BaseSearcher):
|
22
|
+
"""
|
23
|
+
TODO: 现在默认没有 cookie 会跳转
|
24
|
+
"""
|
25
|
+
|
26
|
+
site_name = "qidian"
|
27
|
+
priority = 0
|
28
|
+
SEARCH_URL = "https://www.qidian.com/so/{query}.html"
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def _fetch_html(cls, keyword: str) -> str:
|
32
|
+
"""
|
33
|
+
Fetch raw HTML from Qidian's search page.
|
34
|
+
|
35
|
+
:param keyword: The search term to query on Qidian.
|
36
|
+
:return: HTML text of the search results page, or an empty string on fail.
|
37
|
+
"""
|
38
|
+
url = cls.SEARCH_URL.format(query=cls._quote(keyword))
|
39
|
+
try:
|
40
|
+
response = cls._http_get(url)
|
41
|
+
return response.text
|
42
|
+
except Exception:
|
43
|
+
logger.error(
|
44
|
+
"Failed to fetch HTML for keyword '%s' from '%s'",
|
45
|
+
keyword,
|
46
|
+
url,
|
47
|
+
exc_info=True,
|
48
|
+
)
|
49
|
+
return ""
|
50
|
+
|
51
|
+
@classmethod
|
52
|
+
def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
|
53
|
+
"""
|
54
|
+
Parse raw HTML from Qidian search results into list of SearchResult.
|
55
|
+
|
56
|
+
:param html_str: Raw HTML string from Qidian search results page.
|
57
|
+
:param limit: Maximum number of results to return, or None for all.
|
58
|
+
:return: List of SearchResult dicts.
|
59
|
+
"""
|
60
|
+
doc = html.fromstring(html_str)
|
61
|
+
items = doc.xpath(
|
62
|
+
'//div[@id="result-list"]//li[contains(@class, "res-book-item")]'
|
63
|
+
)
|
64
|
+
results: list[SearchResult] = []
|
65
|
+
|
66
|
+
base_prio = getattr(cls, "priority", 0)
|
67
|
+
for idx, item in enumerate(items):
|
68
|
+
if limit is not None and idx >= limit:
|
69
|
+
break
|
70
|
+
book_id = item.get("data-bid")
|
71
|
+
title_elem = item.xpath('.//h3[@class="book-info-title"]/a')[0]
|
72
|
+
title = title_elem.text_content().strip()
|
73
|
+
author_nodes = item.xpath(
|
74
|
+
'.//p[@class="author"]/a[@class="name"] | .//p[@class="author"]/i'
|
75
|
+
)
|
76
|
+
author = author_nodes[0].text_content().strip() if author_nodes else ""
|
77
|
+
prio = base_prio + idx
|
78
|
+
results.append(
|
79
|
+
SearchResult(
|
80
|
+
site=cls.site_name,
|
81
|
+
book_id=book_id,
|
82
|
+
title=title,
|
83
|
+
author=author,
|
84
|
+
priority=prio,
|
85
|
+
)
|
86
|
+
)
|
87
|
+
return results
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.searchers.registry
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
__all__ = ["register_searcher", "search"]
|
9
|
+
|
10
|
+
from collections.abc import Callable, Sequence
|
11
|
+
from typing import TypeVar
|
12
|
+
|
13
|
+
from novel_downloader.core.searchers.base import BaseSearcher
|
14
|
+
from novel_downloader.models import SearchResult
|
15
|
+
|
16
|
+
S = TypeVar("S", bound=BaseSearcher)
|
17
|
+
|
18
|
+
_SEARCHER_REGISTRY: dict[str, type[BaseSearcher]] = {}
|
19
|
+
|
20
|
+
|
21
|
+
def register_searcher(
|
22
|
+
site_keys: Sequence[str],
|
23
|
+
) -> Callable[[type[S]], type[S]]:
|
24
|
+
"""
|
25
|
+
Decorator to register a searcher class under given name.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def decorator(cls: type[S]) -> type[S]:
|
29
|
+
for key in site_keys:
|
30
|
+
_SEARCHER_REGISTRY[key] = cls
|
31
|
+
return cls
|
32
|
+
|
33
|
+
return decorator
|
34
|
+
|
35
|
+
|
36
|
+
def search(
|
37
|
+
keyword: str,
|
38
|
+
sites: Sequence[str] | None = None,
|
39
|
+
limit: int | None = None,
|
40
|
+
per_site_limit: int = 5,
|
41
|
+
) -> list[SearchResult]:
|
42
|
+
"""
|
43
|
+
Perform a search for the given keyword across one or more registered sites,
|
44
|
+
then aggregate and sort the results by their `priority` value.
|
45
|
+
|
46
|
+
:param keyword: The search term or keyword to query.
|
47
|
+
:param sites: An optional sequence of site keys to limit which searchers.
|
48
|
+
:param limit: Maximum total number of results to return; if None, return all.
|
49
|
+
:param per_site_limit: Maximum number of search results per site.
|
50
|
+
:return: A flat list of `SearchResult` objects.
|
51
|
+
"""
|
52
|
+
keys = list(sites or _SEARCHER_REGISTRY.keys())
|
53
|
+
to_call = {_SEARCHER_REGISTRY[key] for key in keys if key in _SEARCHER_REGISTRY}
|
54
|
+
|
55
|
+
results: list[SearchResult] = []
|
56
|
+
for cls in to_call:
|
57
|
+
try:
|
58
|
+
results.extend(cls.search(keyword, limit=per_site_limit))
|
59
|
+
except Exception:
|
60
|
+
continue
|
61
|
+
|
62
|
+
results.sort(key=lambda res: res["priority"])
|
63
|
+
return results[:limit] if limit is not None else results
|
novel_downloader/locales/en.json
CHANGED
@@ -8,7 +8,6 @@
|
|
8
8
|
"settings_help": "Configure downloader settings.",
|
9
9
|
"settings_set_lang_help": "Switch language between Chinese and English.",
|
10
10
|
"settings_set_config_help": "Set and save a custom YAML configuration file.",
|
11
|
-
"settings_update_rules_help": "Update site rules from a TOML/YAML/JSON file.",
|
12
11
|
"settings_init_help": "Initialize default config and rule files in the current directory.",
|
13
12
|
"settings_init_force_help": "Force overwrite if file already exists.",
|
14
13
|
"settings_init_exists": "File already exists: {filename}",
|
@@ -20,8 +19,6 @@
|
|
20
19
|
"settings_set_lang": "Language switched to {lang}",
|
21
20
|
"settings_set_config": "Configuration file saved from {path}",
|
22
21
|
"settings_set_config_fail": "Failed to save config file: {err}",
|
23
|
-
"settings_update_rules": "Site rules updated from {path}",
|
24
|
-
"settings_update_rules_fail": "Failed to update site rules: {err}",
|
25
22
|
"settings_set_cookies_help": "Set cookies for a specific site.",
|
26
23
|
"settings_set_cookies_prompt_site": "Site identifier (e.g. 'qidian')",
|
27
24
|
"settings_set_cookies_prompt_payload": "Cookie payload (JSON or 'k=v; k2=v2')",
|
@@ -61,10 +58,12 @@
|
|
61
58
|
"download_site_mode": "Mode: {mode}",
|
62
59
|
"download_no_ids": "No book IDs provided. Exiting.",
|
63
60
|
"download_fail_get_ids": "Failed to get book IDs from config: {err}",
|
61
|
+
"download_config_load_fail": "Failed to load config: {err}",
|
64
62
|
"download_only_example": "Only example book IDs found (e.g. '{example}').",
|
65
63
|
"download_edit_config": "Please edit your config and replace them with real book IDs.",
|
66
64
|
"download_downloading": "Downloading book {book_id} from {site}...",
|
67
65
|
"download_prompt_parse": "Parse...",
|
66
|
+
"download_login_failed": "Download login failed: please check your cookies or account credentials and try again.",
|
68
67
|
"download_book_ids": "One or more book IDs to process",
|
69
68
|
"download_option_start": "Start chapter ID (applies to the first book ID only)",
|
70
69
|
"download_option_end": "End chapter ID (applies to the first book ID only)",
|
@@ -114,5 +113,14 @@
|
|
114
113
|
"export_success_txt": "Successfully exported {book_id} as TXT.",
|
115
114
|
"export_failed_txt": "Failed to export {book_id} as TXT: {err}",
|
116
115
|
"export_success_epub": "Successfully exported {book_id} as EPUB.",
|
117
|
-
"export_failed_epub": "Failed to export {book_id} as EPUB: {err}"
|
116
|
+
"export_failed_epub": "Failed to export {book_id} as EPUB: {err}",
|
117
|
+
|
118
|
+
"help_search": "search for a book on one or more sites",
|
119
|
+
"help_search_sites": "which site keys to search (default: all)",
|
120
|
+
"help_search_keyword": "keyword to look for",
|
121
|
+
"help_search_limit": "Maximum number of search results",
|
122
|
+
"help_search_site_limit": "Maximum number of search results per site",
|
123
|
+
"no_results": "No results found.",
|
124
|
+
"prompt_select_index": "Select a result by number (or press Enter to cancel): ",
|
125
|
+
"invalid_selection": "Invalid choice, please try again."
|
118
126
|
}
|
novel_downloader/locales/zh.json
CHANGED
@@ -8,7 +8,6 @@
|
|
8
8
|
"settings_help": "配置下载器设置",
|
9
9
|
"settings_set_lang_help": "在中文和英文之间切换语言",
|
10
10
|
"settings_set_config_help": "设置并保存自定义 YAML 配置文件",
|
11
|
-
"settings_update_rules_help": "从 TOML/YAML/JSON 文件更新站点规则",
|
12
11
|
"settings_init_help": "在当前目录初始化默认配置和规则文件",
|
13
12
|
"settings_init_force_help": "如果文件已存在则强制覆盖",
|
14
13
|
"settings_init_exists": "文件已存在: {filename}",
|
@@ -20,8 +19,6 @@
|
|
20
19
|
"settings_set_lang": "语言已切换为 {lang}",
|
21
20
|
"settings_set_config": "已从 {path} 保存配置文件",
|
22
21
|
"settings_set_config_fail": "保存配置文件失败: {err}",
|
23
|
-
"settings_update_rules": "已从 {path} 更新站点规则",
|
24
|
-
"settings_update_rules_fail": "更新站点规则失败: {err}",
|
25
22
|
"settings_set_cookies_help": "为特定站点设置 Cookie",
|
26
23
|
"settings_set_cookies_prompt_site": "站点标识 (例如 'qidian')",
|
27
24
|
"settings_set_cookies_prompt_payload": "Cookie 内容 (JSON 或 'k=v; k2=v2')",
|
@@ -61,9 +58,11 @@
|
|
61
58
|
"download_site_mode": "使用模式: {mode}",
|
62
59
|
"download_no_ids": "未提供书籍 ID, 正在退出",
|
63
60
|
"download_fail_get_ids": "从配置获取书籍 ID 失败: {err}",
|
61
|
+
"download_config_load_fail": "加载配置失败: {err}",
|
64
62
|
"download_only_example": "只发现示例书籍 ID (例如 '{example}')",
|
65
63
|
"download_edit_config": "请编辑配置并将示例 ID 替换为真实书籍 ID",
|
66
64
|
"download_downloading": "正在从 {site} 下载书籍 {book_id}...",
|
65
|
+
"download_login_failed": "登录失败: 请检查您的 Cookie 或账户信息后重试",
|
67
66
|
"download_prompt_parse": "结束...",
|
68
67
|
"download_book_ids": "要处理的一个或多个小说 ID",
|
69
68
|
"download_option_start": "起始章节 ID (仅用于第一个书籍 ID)",
|
@@ -114,5 +113,14 @@
|
|
114
113
|
"export_success_txt": "成功将 {book_id} 导出为 TXT。",
|
115
114
|
"export_failed_txt": "导出 {book_id} 为 TXT 失败: {err}",
|
116
115
|
"export_success_epub": "成功将 {book_id} 导出为 EPUB",
|
117
|
-
"export_failed_epub": "导出 {book_id} 为 EPUB 失败: {err}"
|
116
|
+
"export_failed_epub": "导出 {book_id} 为 EPUB 失败: {err}",
|
117
|
+
|
118
|
+
"help_search": "在一个或多个站点搜索书籍",
|
119
|
+
"help_search_sites": "要搜索的站点键 (默认为全部)",
|
120
|
+
"help_search_keyword": "要搜索的关键字",
|
121
|
+
"help_search_limit": "总体搜索结果数量上限",
|
122
|
+
"help_search_site_limit": "单站点搜索结果数量上限",
|
123
|
+
"no_results": "未找到结果",
|
124
|
+
"prompt_select_index": "通过编号选择结果 (或按回车取消): ",
|
125
|
+
"invalid_selection": "无效选择, 请重试。"
|
118
126
|
}
|
@@ -5,7 +5,6 @@ novel_downloader.models
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
-
from .browser import NewContextOptions
|
9
8
|
from .chapter import ChapterDict
|
10
9
|
from .config import (
|
11
10
|
BookConfig,
|
@@ -13,54 +12,29 @@ from .config import (
|
|
13
12
|
ExporterConfig,
|
14
13
|
FetcherConfig,
|
15
14
|
ParserConfig,
|
15
|
+
TextCleanerConfig,
|
16
16
|
)
|
17
17
|
from .login import LoginField
|
18
|
-
from .
|
19
|
-
BookInfoRules,
|
20
|
-
FieldRules,
|
21
|
-
RuleStep,
|
22
|
-
SiteProfile,
|
23
|
-
SiteRules,
|
24
|
-
SiteRulesDict,
|
25
|
-
VolumesRules,
|
26
|
-
)
|
27
|
-
from .tasks import (
|
28
|
-
CidTask,
|
29
|
-
HtmlTask,
|
30
|
-
RestoreTask,
|
31
|
-
)
|
18
|
+
from .search import SearchResult
|
32
19
|
from .types import (
|
33
20
|
BrowserType,
|
34
21
|
LogLevel,
|
35
22
|
ModeType,
|
36
|
-
SaveMode,
|
37
23
|
SplitMode,
|
38
|
-
StorageBackend,
|
39
24
|
)
|
40
25
|
|
41
26
|
__all__ = [
|
42
|
-
"NewContextOptions",
|
43
27
|
"BookConfig",
|
44
28
|
"DownloaderConfig",
|
45
29
|
"ParserConfig",
|
46
30
|
"FetcherConfig",
|
47
31
|
"ExporterConfig",
|
32
|
+
"TextCleanerConfig",
|
48
33
|
"ChapterDict",
|
49
34
|
"LoginField",
|
35
|
+
"SearchResult",
|
50
36
|
"BrowserType",
|
51
37
|
"ModeType",
|
52
|
-
"SaveMode",
|
53
|
-
"StorageBackend",
|
54
38
|
"SplitMode",
|
55
39
|
"LogLevel",
|
56
|
-
"BookInfoRules",
|
57
|
-
"FieldRules",
|
58
|
-
"RuleStep",
|
59
|
-
"SiteProfile",
|
60
|
-
"SiteRules",
|
61
|
-
"SiteRulesDict",
|
62
|
-
"VolumesRules",
|
63
|
-
"CidTask",
|
64
|
-
"HtmlTask",
|
65
|
-
"RestoreTask",
|
66
40
|
]
|
@@ -16,14 +16,13 @@ These models are used to map loaded YAML or JSON config data into
|
|
16
16
|
strongly typed Python objects for safer and cleaner access.
|
17
17
|
"""
|
18
18
|
|
19
|
-
from dataclasses import dataclass
|
19
|
+
from dataclasses import dataclass, field
|
20
20
|
from typing import NotRequired, TypedDict
|
21
21
|
|
22
22
|
from .types import (
|
23
23
|
BrowserType,
|
24
24
|
ModeType,
|
25
25
|
SplitMode,
|
26
|
-
StorageBackend,
|
27
26
|
)
|
28
27
|
|
29
28
|
|
@@ -52,13 +51,11 @@ class DownloaderConfig:
|
|
52
51
|
backoff_factor: float = 2.0
|
53
52
|
raw_data_dir: str = "./raw_data"
|
54
53
|
cache_dir: str = "./novel_cache"
|
55
|
-
|
56
|
-
parser_workers: int = 4
|
54
|
+
workers: int = 4
|
57
55
|
skip_existing: bool = True
|
58
56
|
login_required: bool = False
|
59
57
|
save_html: bool = False
|
60
58
|
mode: ModeType = "session"
|
61
|
-
storage_backend: StorageBackend = "json"
|
62
59
|
storage_batch_size: int = 1
|
63
60
|
username: str = ""
|
64
61
|
password: str = ""
|
@@ -83,12 +80,20 @@ class ParserConfig:
|
|
83
80
|
mode: ModeType = "session"
|
84
81
|
|
85
82
|
|
83
|
+
@dataclass
|
84
|
+
class TextCleanerConfig:
|
85
|
+
remove_invisible: bool = True
|
86
|
+
title_remove_patterns: list[str] = field(default_factory=list)
|
87
|
+
title_replacements: dict[str, str] = field(default_factory=dict)
|
88
|
+
content_remove_patterns: list[str] = field(default_factory=list)
|
89
|
+
content_replacements: dict[str, str] = field(default_factory=dict)
|
90
|
+
|
91
|
+
|
86
92
|
@dataclass
|
87
93
|
class ExporterConfig:
|
88
94
|
cache_dir: str = "./novel_cache"
|
89
95
|
raw_data_dir: str = "./raw_data"
|
90
96
|
output_dir: str = "./downloads"
|
91
|
-
storage_backend: StorageBackend = "json"
|
92
97
|
clean_text: bool = True
|
93
98
|
make_txt: bool = True
|
94
99
|
make_epub: bool = False
|
@@ -100,6 +105,7 @@ class ExporterConfig:
|
|
100
105
|
include_toc: bool = False
|
101
106
|
include_picture: bool = False
|
102
107
|
split_mode: SplitMode = "book"
|
108
|
+
cleaner_cfg: TextCleanerConfig = field(default_factory=TextCleanerConfig)
|
103
109
|
|
104
110
|
|
105
111
|
class BookConfig(TypedDict):
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.models.search
|
4
|
+
------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import TypedDict
|
9
|
+
|
10
|
+
|
11
|
+
class SearchResult(TypedDict, total=True):
|
12
|
+
site: str
|
13
|
+
book_id: str
|
14
|
+
title: str
|
15
|
+
author: str
|
16
|
+
priority: int
|
novel_downloader/models/types.py
CHANGED
@@ -8,8 +8,6 @@ novel_downloader.models.types
|
|
8
8
|
from typing import Literal
|
9
9
|
|
10
10
|
ModeType = Literal["browser", "session"]
|
11
|
-
SaveMode = Literal["overwrite", "skip"]
|
12
|
-
StorageBackend = Literal["json", "sqlite"]
|
13
11
|
SplitMode = Literal["book", "volume"]
|
14
12
|
LogLevel = Literal["DEBUG", "INFO", "WARNING", "ERROR"]
|
15
13
|
BrowserType = Literal["chromium", "firefox", "webkit"]
|
@@ -21,11 +21,9 @@ request_interval = 2.0 # 同一本书各章节请求间隔 (秒)
|
|
21
21
|
raw_data_dir = "./raw_data" # 原始章节 JSON/DB 存放目录
|
22
22
|
output_dir = "./downloads" # 最终输出文件存放目录
|
23
23
|
cache_dir = "./novel_cache" # 本地缓存目录 (字体 / 图片等)
|
24
|
-
|
25
|
-
parser_workers = 2 # 并发解析线程数
|
24
|
+
workers = 2 # 工作协程数
|
26
25
|
skip_existing = true # 是否跳过已存在章节
|
27
|
-
|
28
|
-
storage_batch_size = 30 # SQLite 批量提交的章节数量
|
26
|
+
storage_batch_size = 1 # SQLite 批量提交的章节数量
|
29
27
|
|
30
28
|
[general.debug]
|
31
29
|
save_html = false # 是否将抓取到的原始 HTML 保留到磁盘
|
@@ -129,3 +127,32 @@ filename_template = "{title}_{author}" # 文件命名规则
|
|
129
127
|
include_cover = true # 是否在 EPUB 中包含封面
|
130
128
|
include_toc = false # 是否自动生成目录
|
131
129
|
include_picture = false # 是否下载章节图片 (体积较大)
|
130
|
+
|
131
|
+
[cleaner]
|
132
|
+
remove_invisible = true
|
133
|
+
|
134
|
+
[cleaner.title]
|
135
|
+
remove_patterns = [
|
136
|
+
'【[^】]*?】',
|
137
|
+
'[((][^()()]*?求票[^()()]*?[))]',
|
138
|
+
]
|
139
|
+
|
140
|
+
[cleaner.title.replace]
|
141
|
+
':' = ':'
|
142
|
+
|
143
|
+
[cleaner.title.external]
|
144
|
+
enabled = false
|
145
|
+
remove_patterns = "path/to/title-remove.json"
|
146
|
+
replace = "path/to/title-replace.json"
|
147
|
+
|
148
|
+
[cleaner.content]
|
149
|
+
remove_patterns = []
|
150
|
+
|
151
|
+
[cleaner.content.replace]
|
152
|
+
'li子' = '例子'
|
153
|
+
'pinbi词' = '屏蔽词'
|
154
|
+
|
155
|
+
[cleaner.content.external]
|
156
|
+
enabled = false
|
157
|
+
remove_patterns = "path/to/content-remove.json"
|
158
|
+
replace = "path/to/content-replace.json"
|
@@ -0,0 +1,83 @@
|
|
1
|
+
body {
|
2
|
+
font-family: serif;
|
3
|
+
line-height: 1.5;
|
4
|
+
height: 95%;
|
5
|
+
margin: 2em;
|
6
|
+
}
|
7
|
+
|
8
|
+
h1 {
|
9
|
+
font-size: 2em;
|
10
|
+
text-align: center;
|
11
|
+
margin-bottom: 1em;
|
12
|
+
}
|
13
|
+
|
14
|
+
p.new-page-after {
|
15
|
+
page-break-after: always;
|
16
|
+
margin: 0;
|
17
|
+
padding: 0;
|
18
|
+
}
|
19
|
+
|
20
|
+
.intro-info {
|
21
|
+
list-style: none;
|
22
|
+
padding: 0;
|
23
|
+
margin: 0 0 2em;
|
24
|
+
}
|
25
|
+
.intro-info li {
|
26
|
+
margin: 0.5em 0;
|
27
|
+
font-size: 1.1em;
|
28
|
+
}
|
29
|
+
|
30
|
+
.intro-summary {
|
31
|
+
margin-top: 1em;
|
32
|
+
font-size: 1em;
|
33
|
+
}
|
34
|
+
|
35
|
+
.vol-header {
|
36
|
+
display: flex;
|
37
|
+
flex-direction: column;
|
38
|
+
justify-content: center;
|
39
|
+
align-items: center;
|
40
|
+
margin: 0;
|
41
|
+
}
|
42
|
+
|
43
|
+
.vol-border {
|
44
|
+
width: 100%;
|
45
|
+
text-align: center;
|
46
|
+
margin: 0 auto 0 auto;
|
47
|
+
text-indent: 0em;
|
48
|
+
}
|
49
|
+
.vol-border.flip {
|
50
|
+
transform: rotate(180deg);
|
51
|
+
-ms-transform: rotate(180deg);
|
52
|
+
-moz-transform: rotate(180deg);
|
53
|
+
-webkit-transform: rotate(180deg);
|
54
|
+
-o-transform: rotate(180deg);
|
55
|
+
}
|
56
|
+
|
57
|
+
.vol-title-main,
|
58
|
+
.vol-title-sub {
|
59
|
+
font-weight: bold;
|
60
|
+
text-align: center;
|
61
|
+
text-indent: 0em;
|
62
|
+
color: #6e471c;
|
63
|
+
margin: 0.25em 0;
|
64
|
+
display: block;
|
65
|
+
color: #6e471c;
|
66
|
+
text-shadow: 1px 1px 2px rgba(255,255,255,0.8);
|
67
|
+
}
|
68
|
+
.vol-title-main {
|
69
|
+
font-size: 1.25rem;
|
70
|
+
}
|
71
|
+
.vol-title-sub {
|
72
|
+
font-size: 1rem;
|
73
|
+
}
|
74
|
+
|
75
|
+
.volume-intro-text {
|
76
|
+
margin: 1em 0;
|
77
|
+
padding: 0 1em;
|
78
|
+
text-align: justify;
|
79
|
+
}
|
80
|
+
.volume-intro-text p {
|
81
|
+
margin: 0.5em 0;
|
82
|
+
line-height: 1.6;
|
83
|
+
}
|