novel-downloader 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -2
- novel_downloader/cli/config.py +1 -83
- novel_downloader/cli/download.py +4 -5
- novel_downloader/cli/export.py +4 -1
- novel_downloader/cli/main.py +2 -0
- novel_downloader/cli/search.py +123 -0
- novel_downloader/config/__init__.py +3 -10
- novel_downloader/config/adapter.py +190 -54
- novel_downloader/config/loader.py +2 -3
- novel_downloader/core/__init__.py +13 -13
- novel_downloader/core/downloaders/__init__.py +10 -11
- novel_downloader/core/downloaders/base.py +152 -26
- novel_downloader/core/downloaders/biquge.py +5 -1
- novel_downloader/core/downloaders/common.py +157 -378
- novel_downloader/core/downloaders/esjzone.py +5 -1
- novel_downloader/core/downloaders/linovelib.py +5 -1
- novel_downloader/core/downloaders/qianbi.py +291 -4
- novel_downloader/core/downloaders/qidian.py +199 -285
- novel_downloader/core/downloaders/registry.py +67 -0
- novel_downloader/core/downloaders/sfacg.py +5 -1
- novel_downloader/core/downloaders/yamibo.py +5 -1
- novel_downloader/core/exporters/__init__.py +10 -11
- novel_downloader/core/exporters/base.py +87 -7
- novel_downloader/core/exporters/biquge.py +5 -8
- novel_downloader/core/exporters/common/__init__.py +2 -2
- novel_downloader/core/exporters/common/epub.py +82 -166
- novel_downloader/core/exporters/common/main_exporter.py +0 -60
- novel_downloader/core/exporters/common/txt.py +82 -83
- novel_downloader/core/exporters/epub_util.py +157 -1330
- novel_downloader/core/exporters/esjzone.py +5 -8
- novel_downloader/core/exporters/linovelib/__init__.py +2 -2
- novel_downloader/core/exporters/linovelib/epub.py +157 -212
- novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
- novel_downloader/core/exporters/linovelib/txt.py +67 -63
- novel_downloader/core/exporters/qianbi.py +5 -8
- novel_downloader/core/exporters/qidian.py +14 -4
- novel_downloader/core/exporters/registry.py +53 -0
- novel_downloader/core/exporters/sfacg.py +5 -8
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/exporters/yamibo.py +5 -8
- novel_downloader/core/fetchers/__init__.py +19 -24
- novel_downloader/core/fetchers/base/__init__.py +3 -3
- novel_downloader/core/fetchers/base/browser.py +23 -4
- novel_downloader/core/fetchers/base/session.py +30 -5
- novel_downloader/core/fetchers/biquge/__init__.py +3 -3
- novel_downloader/core/fetchers/biquge/browser.py +5 -0
- novel_downloader/core/fetchers/biquge/session.py +6 -1
- novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
- novel_downloader/core/fetchers/esjzone/browser.py +5 -0
- novel_downloader/core/fetchers/esjzone/session.py +6 -1
- novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
- novel_downloader/core/fetchers/linovelib/browser.py +6 -1
- novel_downloader/core/fetchers/linovelib/session.py +6 -1
- novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
- novel_downloader/core/fetchers/qianbi/browser.py +5 -0
- novel_downloader/core/fetchers/qianbi/session.py +5 -0
- novel_downloader/core/fetchers/qidian/__init__.py +3 -3
- novel_downloader/core/fetchers/qidian/browser.py +12 -4
- novel_downloader/core/fetchers/qidian/session.py +11 -3
- novel_downloader/core/fetchers/registry.py +71 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
- novel_downloader/core/fetchers/sfacg/browser.py +5 -0
- novel_downloader/core/fetchers/sfacg/session.py +5 -0
- novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
- novel_downloader/core/fetchers/yamibo/browser.py +5 -0
- novel_downloader/core/fetchers/yamibo/session.py +6 -1
- novel_downloader/core/interfaces/__init__.py +7 -5
- novel_downloader/core/interfaces/searcher.py +18 -0
- novel_downloader/core/parsers/__init__.py +10 -11
- novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
- novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
- novel_downloader/core/parsers/qidian/main_parser.py +10 -21
- novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/registry.py +68 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
- novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
- novel_downloader/core/searchers/__init__.py +20 -0
- novel_downloader/core/searchers/base.py +92 -0
- novel_downloader/core/searchers/biquge.py +83 -0
- novel_downloader/core/searchers/esjzone.py +84 -0
- novel_downloader/core/searchers/qianbi.py +131 -0
- novel_downloader/core/searchers/qidian.py +87 -0
- novel_downloader/core/searchers/registry.py +63 -0
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +12 -4
- novel_downloader/models/__init__.py +4 -30
- novel_downloader/models/config.py +12 -6
- novel_downloader/models/search.py +16 -0
- novel_downloader/models/types.py +0 -2
- novel_downloader/resources/config/settings.toml +31 -4
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/utils/__init__.py +52 -0
- novel_downloader/utils/chapter_storage.py +244 -224
- novel_downloader/utils/constants.py +1 -21
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +77 -0
- novel_downloader/utils/epub/documents.py +403 -0
- novel_downloader/utils/epub/models.py +134 -0
- novel_downloader/utils/epub/utils.py +212 -0
- novel_downloader/utils/file_utils/__init__.py +10 -14
- novel_downloader/utils/file_utils/io.py +20 -51
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -3
- novel_downloader/utils/fontocr/__init__.py +5 -5
- novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
- novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
- novel_downloader/utils/fontocr/ocr_v1.py +13 -1
- novel_downloader/utils/fontocr/ocr_v2.py +13 -1
- novel_downloader/utils/fontocr/ocr_v3.py +744 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +2 -0
- novel_downloader/utils/network.py +110 -251
- novel_downloader/utils/state.py +1 -0
- novel_downloader/utils/text_utils/__init__.py +18 -17
- novel_downloader/utils/text_utils/diff_display.py +4 -5
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +3 -3
- novel_downloader/utils/time_utils/datetime_utils.py +4 -5
- novel_downloader/utils/time_utils/sleep_utils.py +2 -3
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
- novel_downloader-1.5.0.dist-info/RECORD +164 -0
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/common/browser.py +0 -79
- novel_downloader/core/fetchers/common/session.py +0 -79
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.4.dist-info/RECORD +0 -165
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -3,22 +3,19 @@
|
|
3
3
|
novel_downloader.core.exporters.linovelib.txt
|
4
4
|
---------------------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
This module defines `linovelib_export_as_txt` function, which assembles and formats
|
9
|
-
a novel based on metadata and chapter files found in the raw data directory.
|
10
|
-
It is intended to be used by `LinovelibExporter` as part of the save/export process.
|
6
|
+
Defines `linovelib_export_as_txt` to assemble and export a Linovelib novel
|
7
|
+
into a single `.txt` file. Intended for use by `LinovelibExporter`.
|
11
8
|
"""
|
12
9
|
|
13
10
|
from __future__ import annotations
|
14
11
|
|
15
|
-
import json
|
16
12
|
from typing import TYPE_CHECKING
|
17
13
|
|
18
|
-
from novel_downloader.
|
19
|
-
|
20
|
-
|
14
|
+
from novel_downloader.core.exporters.txt_util import (
|
15
|
+
build_txt_chapter,
|
16
|
+
build_txt_header,
|
21
17
|
)
|
18
|
+
from novel_downloader.utils import get_cleaner, save_as_txt
|
22
19
|
|
23
20
|
if TYPE_CHECKING:
|
24
21
|
from .main_exporter import LinovelibExporter
|
@@ -29,56 +26,69 @@ def linovelib_export_as_txt(
|
|
29
26
|
book_id: str,
|
30
27
|
) -> None:
|
31
28
|
"""
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
29
|
+
Export a novel as a single text file by merging all chapter data.
|
30
|
+
|
31
|
+
Steps:
|
32
|
+
1. Read metadata from `book_info.json`.
|
33
|
+
2. For each volume:
|
34
|
+
- Clean & append the volume title.
|
35
|
+
- Clean & append optional volume intro.
|
36
|
+
- Batch-fetch all chapters in this volume to minimize SQLite overhead.
|
37
|
+
- For each chapter: clean title & content, then append.
|
38
|
+
3. Build a header block with metadata.
|
39
|
+
4. Concatenate header + all chapter blocks, then save as `{book_name}.txt`.
|
40
|
+
|
41
|
+
:param exporter: The LinovelibExporter instance.
|
42
|
+
:param book_id: Identifier of the novel (subdirectory under raw data).
|
42
43
|
"""
|
43
44
|
TAG = "[exporter]"
|
44
45
|
# --- Paths & options ---
|
45
|
-
raw_base = exporter._raw_data_dir / book_id
|
46
46
|
out_dir = exporter.output_dir
|
47
47
|
out_dir.mkdir(parents=True, exist_ok=True)
|
48
|
+
cleaner = get_cleaner(
|
49
|
+
enabled=exporter._config.clean_text,
|
50
|
+
config=exporter._config.cleaner_cfg,
|
51
|
+
)
|
48
52
|
|
49
53
|
# --- Load book_info.json ---
|
50
|
-
|
51
|
-
|
52
|
-
info_text = info_path.read_text(encoding="utf-8")
|
53
|
-
book_info = json.loads(info_text)
|
54
|
-
except Exception as e:
|
55
|
-
exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
|
54
|
+
book_info = exporter._load_book_info(book_id)
|
55
|
+
if not book_info:
|
56
56
|
return
|
57
57
|
|
58
58
|
# --- Compile chapters ---
|
59
59
|
parts: list[str] = []
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
exporter.logger.info("%s Processing volume: %s", TAG, vol_name)
|
60
|
+
|
61
|
+
for vol in book_info.get("volumes", []):
|
62
|
+
vol_title = cleaner.clean_title(vol.get("volume_name", ""))
|
63
|
+
if vol_title:
|
64
|
+
parts.append(f"\n\n{'=' * 6} {vol_title} {'=' * 6}\n\n")
|
65
|
+
exporter.logger.info("%s Processing volume: %s", TAG, vol_title)
|
66
|
+
|
67
|
+
vol_intro = cleaner.clean_content(vol.get("volume_intro", ""))
|
69
68
|
if vol_intro:
|
70
69
|
parts.append(f"{vol_intro}\n\n")
|
71
|
-
|
72
|
-
|
73
|
-
|
70
|
+
|
71
|
+
# Batch-fetch chapters for this volume
|
72
|
+
chap_ids = [
|
73
|
+
chap.get("chapterId")
|
74
|
+
for chap in vol.get("chapters", [])
|
75
|
+
if chap.get("chapterId")
|
76
|
+
]
|
77
|
+
chap_map = exporter._get_chapters(book_id, chap_ids)
|
78
|
+
|
79
|
+
for chap_meta in vol.get("chapters", []):
|
80
|
+
chap_id = chap_meta.get("chapterId")
|
74
81
|
if not chap_id:
|
75
|
-
exporter.logger.warning(
|
82
|
+
exporter.logger.warning(
|
83
|
+
"%s Missing chapterId, skipping: %s", TAG, chap_meta
|
84
|
+
)
|
76
85
|
continue
|
77
86
|
|
78
|
-
|
79
|
-
|
87
|
+
chap_title = cleaner.clean_title(chap_meta.get("title", ""))
|
88
|
+
data = chap_map.get(chap_id)
|
89
|
+
if not data:
|
80
90
|
exporter.logger.info(
|
81
|
-
"%s Missing chapter
|
91
|
+
"%s Missing chapter: %s (%s), skipping.",
|
82
92
|
TAG,
|
83
93
|
chap_title,
|
84
94
|
chap_id,
|
@@ -86,33 +96,27 @@ def linovelib_export_as_txt(
|
|
86
96
|
continue
|
87
97
|
|
88
98
|
# Extract structured fields
|
89
|
-
title =
|
90
|
-
content =
|
99
|
+
title = cleaner.clean_title(data.get("title", chap_title))
|
100
|
+
content = cleaner.clean_content(data.get("content", ""))
|
91
101
|
|
92
|
-
parts.append(
|
102
|
+
parts.append(build_txt_chapter(title=title, paragraphs=content, extras={}))
|
93
103
|
|
94
104
|
# --- Build header ---
|
95
|
-
name = book_info.get("book_name")
|
96
|
-
author = book_info.get("author")
|
97
|
-
words = book_info.get("word_count")
|
98
|
-
updated = book_info.get("update_time")
|
99
|
-
summary = book_info.get("summary")
|
105
|
+
name = book_info.get("book_name") or ""
|
106
|
+
author = book_info.get("author") or ""
|
107
|
+
words = book_info.get("word_count") or ""
|
108
|
+
updated = book_info.get("update_time") or ""
|
109
|
+
summary = book_info.get("summary") or ""
|
100
110
|
|
101
|
-
|
111
|
+
header_fields = [
|
102
112
|
("书名", name),
|
103
113
|
("作者", author),
|
104
114
|
("总字数", words),
|
105
115
|
("更新日期", updated),
|
116
|
+
("内容简介", summary),
|
106
117
|
]
|
107
|
-
header_lines = [f"{label}: {value}" for label, value in fields if value]
|
108
|
-
|
109
|
-
if summary:
|
110
|
-
header_lines.append("内容简介:")
|
111
|
-
header_lines.append(summary)
|
112
|
-
|
113
|
-
header_lines += ["", "-" * 10, ""]
|
114
118
|
|
115
|
-
header =
|
119
|
+
header = build_txt_header(header_fields)
|
116
120
|
|
117
121
|
final_text = header + "\n\n" + "\n\n".join(parts).strip()
|
118
122
|
|
@@ -121,9 +125,9 @@ def linovelib_export_as_txt(
|
|
121
125
|
out_path = out_dir / out_name
|
122
126
|
|
123
127
|
# --- Save final text ---
|
124
|
-
|
125
|
-
|
128
|
+
result = save_as_txt(content=final_text, filepath=out_path)
|
129
|
+
if result:
|
126
130
|
exporter.logger.info("%s Novel saved to: %s", TAG, out_path)
|
127
|
-
|
128
|
-
exporter.logger.error("%s Failed to
|
131
|
+
else:
|
132
|
+
exporter.logger.error("%s Failed to write novel to %s", TAG, out_path)
|
129
133
|
return
|
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.qianbi
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
+
__all__ = ["QianbiExporter"]
|
9
|
+
|
10
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
8
11
|
from novel_downloader.models import ExporterConfig
|
9
12
|
|
10
13
|
from .common import CommonExporter
|
11
14
|
|
12
15
|
|
16
|
+
@register_exporter(site_keys=["qianbi"])
|
13
17
|
class QianbiExporter(CommonExporter):
|
14
18
|
def __init__(
|
15
19
|
self,
|
16
20
|
config: ExporterConfig,
|
17
21
|
):
|
18
|
-
super().__init__(
|
19
|
-
config,
|
20
|
-
site="qianbi",
|
21
|
-
chap_folders=["chapters"],
|
22
|
-
)
|
23
|
-
|
24
|
-
|
25
|
-
__all__ = ["QianbiExporter"]
|
22
|
+
super().__init__(config, site="qianbi")
|
@@ -8,12 +8,25 @@ of novels sourced from Qidian (起点中文网). It implements the platform-spec
|
|
8
8
|
logic required to structure and export novel content into desired formats.
|
9
9
|
"""
|
10
10
|
|
11
|
+
__all__ = ["QidianExporter"]
|
12
|
+
|
13
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
11
14
|
from novel_downloader.models import ExporterConfig
|
12
15
|
|
13
16
|
from .common import CommonExporter
|
14
17
|
|
15
18
|
|
19
|
+
@register_exporter(site_keys=["qidian", "qd"])
|
16
20
|
class QidianExporter(CommonExporter):
|
21
|
+
""" """
|
22
|
+
|
23
|
+
DEFAULT_SOURCE_ID = 0
|
24
|
+
ENCRYPTED_SOURCE_ID = 1
|
25
|
+
PRIORITIES_MAP = {
|
26
|
+
DEFAULT_SOURCE_ID: 0,
|
27
|
+
ENCRYPTED_SOURCE_ID: 1,
|
28
|
+
}
|
29
|
+
|
17
30
|
def __init__(
|
18
31
|
self,
|
19
32
|
config: ExporterConfig,
|
@@ -21,8 +34,5 @@ class QidianExporter(CommonExporter):
|
|
21
34
|
super().__init__(
|
22
35
|
config,
|
23
36
|
site="qidian",
|
24
|
-
|
37
|
+
priorities=self.PRIORITIES_MAP,
|
25
38
|
)
|
26
|
-
|
27
|
-
|
28
|
-
__all__ = ["QidianExporter"]
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.exporters.registry
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
__all__ = ["register_exporter", "get_exporter"]
|
9
|
+
|
10
|
+
from collections.abc import Callable, Sequence
|
11
|
+
from typing import TypeVar
|
12
|
+
|
13
|
+
from novel_downloader.core.interfaces import ExporterProtocol
|
14
|
+
from novel_downloader.models import ExporterConfig
|
15
|
+
|
16
|
+
ExporterBuilder = Callable[[ExporterConfig], ExporterProtocol]
|
17
|
+
|
18
|
+
E = TypeVar("E", bound=ExporterProtocol)
|
19
|
+
_EXPORTER_MAP: dict[str, ExporterBuilder] = {}
|
20
|
+
|
21
|
+
|
22
|
+
def register_exporter(
|
23
|
+
site_keys: Sequence[str],
|
24
|
+
) -> Callable[[type[E]], type[E]]:
|
25
|
+
"""
|
26
|
+
Decorator to register a exporter class under given keys.
|
27
|
+
|
28
|
+
:param site_keys: Sequence of site identifiers
|
29
|
+
:return: A class decorator that populates _EXPORTER_MAP.
|
30
|
+
"""
|
31
|
+
|
32
|
+
def decorator(cls: type[E]) -> type[E]:
|
33
|
+
for key in site_keys:
|
34
|
+
_EXPORTER_MAP[key.lower()] = cls
|
35
|
+
return cls
|
36
|
+
|
37
|
+
return decorator
|
38
|
+
|
39
|
+
|
40
|
+
def get_exporter(site: str, config: ExporterConfig) -> ExporterProtocol:
|
41
|
+
"""
|
42
|
+
Returns a site-specific exporter instance.
|
43
|
+
|
44
|
+
:param site: Site name (e.g., 'qidian')
|
45
|
+
:param config: Configuration for the exporter
|
46
|
+
:return: An instance of a exporter class
|
47
|
+
"""
|
48
|
+
site_key = site.lower()
|
49
|
+
try:
|
50
|
+
exporter_cls = _EXPORTER_MAP[site_key]
|
51
|
+
except KeyError as err:
|
52
|
+
raise ValueError(f"Unsupported site: {site}") from err
|
53
|
+
return exporter_cls(config)
|
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.sfacg
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
+
__all__ = ["SfacgExporter"]
|
9
|
+
|
10
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
8
11
|
from novel_downloader.models import ExporterConfig
|
9
12
|
|
10
13
|
from .common import CommonExporter
|
11
14
|
|
12
15
|
|
16
|
+
@register_exporter(site_keys=["sfacg"])
|
13
17
|
class SfacgExporter(CommonExporter):
|
14
18
|
def __init__(
|
15
19
|
self,
|
16
20
|
config: ExporterConfig,
|
17
21
|
):
|
18
|
-
super().__init__(
|
19
|
-
config,
|
20
|
-
site="sfacg",
|
21
|
-
chap_folders=["chapters"],
|
22
|
-
)
|
23
|
-
|
24
|
-
|
25
|
-
__all__ = ["SfacgExporter"]
|
22
|
+
super().__init__(config, site="sfacg")
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.exporters.txt_util
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
Utilities for generating plain-text exports of novel content.
|
7
|
+
"""
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"build_txt_header",
|
11
|
+
"build_txt_chapter",
|
12
|
+
]
|
13
|
+
|
14
|
+
import re
|
15
|
+
|
16
|
+
_IMG_TAG_RE = re.compile(r"<img[^>]*>")
|
17
|
+
|
18
|
+
|
19
|
+
def build_txt_header(fields: list[tuple[str, str]]) -> str:
|
20
|
+
"""
|
21
|
+
Build a simple text header from label-value pairs, followed by a dashed separator.
|
22
|
+
|
23
|
+
:param fields: List of (label, value) pairs.
|
24
|
+
:return: A single string containing the formatted header.
|
25
|
+
"""
|
26
|
+
header_lines = [f"{label}: {value}" for label, value in fields if value]
|
27
|
+
header_lines += ["", "-" * 10, ""]
|
28
|
+
return "\n".join(header_lines)
|
29
|
+
|
30
|
+
|
31
|
+
def build_txt_chapter(
|
32
|
+
title: str,
|
33
|
+
paragraphs: str,
|
34
|
+
extras: dict[str, str] | None = None,
|
35
|
+
) -> str:
|
36
|
+
"""
|
37
|
+
Build a formatted chapter text block including title, body paragraphs,
|
38
|
+
and optional extra sections.
|
39
|
+
|
40
|
+
- Strips any `<img...>` tags from paragraphs.
|
41
|
+
- Title appears first (stripped of surrounding whitespace).
|
42
|
+
- Each non-blank line in `paragraphs` becomes its own paragraph.
|
43
|
+
|
44
|
+
:param title: Chapter title.
|
45
|
+
:param paragraphs: Raw multi-line string. Blank lines are ignored.
|
46
|
+
:param extras: Optional dict mapping section titles to multi-line strings.
|
47
|
+
:return: A string where title, paragraphs, and extras are joined by lines.
|
48
|
+
"""
|
49
|
+
parts: list[str] = [title.strip()]
|
50
|
+
|
51
|
+
# add each nonempty paragraph line
|
52
|
+
paragraphs = _IMG_TAG_RE.sub("", paragraphs)
|
53
|
+
for ln in paragraphs.splitlines():
|
54
|
+
line = ln.strip()
|
55
|
+
if line:
|
56
|
+
parts.append(line)
|
57
|
+
|
58
|
+
if extras:
|
59
|
+
for title, text in extras.items():
|
60
|
+
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
|
61
|
+
if not lines:
|
62
|
+
continue
|
63
|
+
parts.append("---")
|
64
|
+
parts.append(title.strip())
|
65
|
+
parts.extend(lines)
|
66
|
+
|
67
|
+
return "\n\n".join(parts)
|
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.yamibo
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
+
__all__ = ["YamiboExporter"]
|
9
|
+
|
10
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
8
11
|
from novel_downloader.models import ExporterConfig
|
9
12
|
|
10
13
|
from .common import CommonExporter
|
11
14
|
|
12
15
|
|
16
|
+
@register_exporter(site_keys=["yamibo"])
|
13
17
|
class YamiboExporter(CommonExporter):
|
14
18
|
def __init__(
|
15
19
|
self,
|
16
20
|
config: ExporterConfig,
|
17
21
|
):
|
18
|
-
super().__init__(
|
19
|
-
config,
|
20
|
-
site="yamibo",
|
21
|
-
chap_folders=["chapters"],
|
22
|
-
)
|
23
|
-
|
24
|
-
|
25
|
-
__all__ = ["YamiboExporter"]
|
22
|
+
super().__init__(config, site="yamibo")
|
@@ -16,17 +16,30 @@ Subpackages:
|
|
16
16
|
- qidian (起点中文网)
|
17
17
|
- sfacg (SF轻小说)
|
18
18
|
- yamibo (百合会)
|
19
|
-
- common (通用架构)
|
20
19
|
"""
|
21
20
|
|
21
|
+
__all__ = [
|
22
|
+
"get_fetcher",
|
23
|
+
"BiqugeBrowser",
|
24
|
+
"BiqugeSession",
|
25
|
+
"EsjzoneBrowser",
|
26
|
+
"EsjzoneSession",
|
27
|
+
"LinovelibBrowser",
|
28
|
+
"LinovelibSession",
|
29
|
+
"QianbiBrowser",
|
30
|
+
"QianbiSession",
|
31
|
+
"QidianBrowser",
|
32
|
+
"QidianSession",
|
33
|
+
"SfacgBrowser",
|
34
|
+
"SfacgSession",
|
35
|
+
"YamiboBrowser",
|
36
|
+
"YamiboSession",
|
37
|
+
]
|
38
|
+
|
22
39
|
from .biquge import (
|
23
40
|
BiqugeBrowser,
|
24
41
|
BiqugeSession,
|
25
42
|
)
|
26
|
-
from .common import (
|
27
|
-
CommonBrowser,
|
28
|
-
CommonSession,
|
29
|
-
)
|
30
43
|
from .esjzone import (
|
31
44
|
EsjzoneBrowser,
|
32
45
|
EsjzoneSession,
|
@@ -43,6 +56,7 @@ from .qidian import (
|
|
43
56
|
QidianBrowser,
|
44
57
|
QidianSession,
|
45
58
|
)
|
59
|
+
from .registry import get_fetcher
|
46
60
|
from .sfacg import (
|
47
61
|
SfacgBrowser,
|
48
62
|
SfacgSession,
|
@@ -51,22 +65,3 @@ from .yamibo import (
|
|
51
65
|
YamiboBrowser,
|
52
66
|
YamiboSession,
|
53
67
|
)
|
54
|
-
|
55
|
-
__all__ = [
|
56
|
-
"BiqugeBrowser",
|
57
|
-
"BiqugeSession",
|
58
|
-
"CommonBrowser",
|
59
|
-
"CommonSession",
|
60
|
-
"EsjzoneBrowser",
|
61
|
-
"EsjzoneSession",
|
62
|
-
"LinovelibBrowser",
|
63
|
-
"LinovelibSession",
|
64
|
-
"QianbiBrowser",
|
65
|
-
"QianbiSession",
|
66
|
-
"QidianBrowser",
|
67
|
-
"QidianSession",
|
68
|
-
"SfacgBrowser",
|
69
|
-
"SfacgSession",
|
70
|
-
"YamiboBrowser",
|
71
|
-
"YamiboSession",
|
72
|
-
]
|
@@ -6,9 +6,11 @@ novel_downloader.core.fetchers.base.browser
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
import abc
|
9
|
+
import asyncio
|
9
10
|
import logging
|
10
11
|
import types
|
11
|
-
from
|
12
|
+
from pathlib import Path
|
13
|
+
from typing import Any, Literal, Self, TypedDict
|
12
14
|
|
13
15
|
from playwright.async_api import (
|
14
16
|
Browser,
|
@@ -21,7 +23,7 @@ from playwright.async_api import (
|
|
21
23
|
)
|
22
24
|
|
23
25
|
from novel_downloader.core.interfaces import FetcherProtocol
|
24
|
-
from novel_downloader.models import FetcherConfig, LoginField
|
26
|
+
from novel_downloader.models import FetcherConfig, LoginField
|
25
27
|
from novel_downloader.utils.constants import (
|
26
28
|
DATA_DIR,
|
27
29
|
DEFAULT_USER_AGENT,
|
@@ -37,6 +39,16 @@ window.chrome = { runtime: {} };
|
|
37
39
|
""".strip()
|
38
40
|
|
39
41
|
|
42
|
+
class NewContextOptions(TypedDict, total=False):
|
43
|
+
user_agent: str
|
44
|
+
locale: str
|
45
|
+
storage_state: Path
|
46
|
+
viewport: ViewportSize
|
47
|
+
java_script_enabled: bool
|
48
|
+
ignore_https_errors: bool
|
49
|
+
extra_http_headers: dict[str, str]
|
50
|
+
|
51
|
+
|
40
52
|
class BaseBrowser(FetcherProtocol, abc.ABC):
|
41
53
|
"""
|
42
54
|
BaseBrowser wraps basic browser operations using playwright
|
@@ -198,12 +210,15 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
|
|
198
210
|
wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
199
211
|
| None = "load",
|
200
212
|
referer: str | None = None,
|
213
|
+
delay: float = 0.0,
|
201
214
|
**kwargs: Any,
|
202
215
|
) -> str:
|
203
216
|
if self._reuse_page:
|
204
|
-
return await self._fetch_with_reuse(
|
217
|
+
return await self._fetch_with_reuse(
|
218
|
+
url, wait_until, referer, delay, **kwargs
|
219
|
+
)
|
205
220
|
else:
|
206
|
-
return await self._fetch_with_new(url, wait_until, referer, **kwargs)
|
221
|
+
return await self._fetch_with_new(url, wait_until, referer, delay, **kwargs)
|
207
222
|
|
208
223
|
async def load_state(self) -> bool:
|
209
224
|
""" """
|
@@ -282,11 +297,13 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
|
|
282
297
|
wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
283
298
|
| None = "load",
|
284
299
|
referer: str | None = None,
|
300
|
+
delay: float = 0.0,
|
285
301
|
**kwargs: Any,
|
286
302
|
) -> str:
|
287
303
|
page = await self.context.new_page()
|
288
304
|
try:
|
289
305
|
await page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
|
306
|
+
await asyncio.sleep(delay)
|
290
307
|
html: str = await page.content()
|
291
308
|
return html
|
292
309
|
finally:
|
@@ -298,11 +315,13 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
|
|
298
315
|
wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
299
316
|
| None = "load",
|
300
317
|
referer: str | None = None,
|
318
|
+
delay: float = 0.0,
|
301
319
|
**kwargs: Any,
|
302
320
|
) -> str:
|
303
321
|
if not self._page:
|
304
322
|
self._page = await self.context.new_page()
|
305
323
|
await self._page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
|
324
|
+
await asyncio.sleep(delay)
|
306
325
|
html: str = await self._page.content()
|
307
326
|
return html
|
308
327
|
|
@@ -21,12 +21,14 @@ from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
|
|
21
21
|
|
22
22
|
from novel_downloader.core.interfaces import FetcherProtocol
|
23
23
|
from novel_downloader.models import FetcherConfig, LoginField
|
24
|
+
from novel_downloader.utils import (
|
25
|
+
async_sleep_with_random_delay,
|
26
|
+
parse_cookie_expires,
|
27
|
+
)
|
24
28
|
from novel_downloader.utils.constants import (
|
25
29
|
DATA_DIR,
|
26
30
|
DEFAULT_USER_HEADERS,
|
27
31
|
)
|
28
|
-
from novel_downloader.utils.cookies import parse_cookie_expires
|
29
|
-
from novel_downloader.utils.time_utils import async_sleep_with_random_delay
|
30
32
|
|
31
33
|
from .rate_limiter import TokenBucketRateLimiter
|
32
34
|
|
@@ -156,7 +158,12 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
156
158
|
await self._session.close()
|
157
159
|
self._session = None
|
158
160
|
|
159
|
-
async def fetch(
|
161
|
+
async def fetch(
|
162
|
+
self,
|
163
|
+
url: str,
|
164
|
+
encoding: str | None = None,
|
165
|
+
**kwargs: Any,
|
166
|
+
) -> str:
|
160
167
|
"""
|
161
168
|
Fetch the content from the given URL asynchronously, with retry support.
|
162
169
|
|
@@ -172,8 +179,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
172
179
|
try:
|
173
180
|
async with self.session.get(url, **kwargs) as resp:
|
174
181
|
resp.raise_for_status()
|
175
|
-
|
176
|
-
return text
|
182
|
+
return await self._response_to_str(resp, encoding)
|
177
183
|
except aiohttp.ClientError:
|
178
184
|
if attempt < self.retry_times:
|
179
185
|
await async_sleep_with_random_delay(
|
@@ -405,6 +411,25 @@ class BaseSession(FetcherProtocol, abc.ABC):
|
|
405
411
|
return dict(self._session.headers)
|
406
412
|
return self._headers.copy()
|
407
413
|
|
414
|
+
@staticmethod
|
415
|
+
async def _response_to_str(
|
416
|
+
resp: ClientResponse,
|
417
|
+
encoding: str | None = None,
|
418
|
+
) -> str:
|
419
|
+
"""
|
420
|
+
Read the full body of resp as text. First try the declared charset,
|
421
|
+
then on UnicodeDecodeError fall back to a lenient utf-8 decode.
|
422
|
+
"""
|
423
|
+
data: bytes = await resp.read()
|
424
|
+
encodings = [encoding, resp.charset, "utf-8", "gb18030", "gbk"]
|
425
|
+
encodings_list: list[str] = [e for e in encodings if e]
|
426
|
+
for enc in encodings_list:
|
427
|
+
try:
|
428
|
+
return data.decode(enc)
|
429
|
+
except UnicodeDecodeError:
|
430
|
+
continue
|
431
|
+
return data.decode("utf-8", errors="ignore")
|
432
|
+
|
408
433
|
async def __aenter__(self) -> Self:
|
409
434
|
if self._session is None or self._session.closed:
|
410
435
|
await self.init()
|
@@ -5,10 +5,10 @@ novel_downloader.core.fetchers.biquge
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
-
from .browser import BiqugeBrowser
|
9
|
-
from .session import BiqugeSession
|
10
|
-
|
11
8
|
__all__ = [
|
12
9
|
"BiqugeBrowser",
|
13
10
|
"BiqugeSession",
|
14
11
|
]
|
12
|
+
|
13
|
+
from .browser import BiqugeBrowser
|
14
|
+
from .session import BiqugeSession
|