novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -2
  3. novel_downloader/cli/config.py +1 -83
  4. novel_downloader/cli/download.py +4 -5
  5. novel_downloader/cli/export.py +4 -1
  6. novel_downloader/cli/main.py +2 -0
  7. novel_downloader/cli/search.py +123 -0
  8. novel_downloader/config/__init__.py +3 -10
  9. novel_downloader/config/adapter.py +190 -54
  10. novel_downloader/config/loader.py +2 -3
  11. novel_downloader/core/__init__.py +13 -13
  12. novel_downloader/core/downloaders/__init__.py +10 -11
  13. novel_downloader/core/downloaders/base.py +152 -26
  14. novel_downloader/core/downloaders/biquge.py +5 -1
  15. novel_downloader/core/downloaders/common.py +157 -378
  16. novel_downloader/core/downloaders/esjzone.py +5 -1
  17. novel_downloader/core/downloaders/linovelib.py +5 -1
  18. novel_downloader/core/downloaders/qianbi.py +291 -4
  19. novel_downloader/core/downloaders/qidian.py +199 -285
  20. novel_downloader/core/downloaders/registry.py +67 -0
  21. novel_downloader/core/downloaders/sfacg.py +5 -1
  22. novel_downloader/core/downloaders/yamibo.py +5 -1
  23. novel_downloader/core/exporters/__init__.py +10 -11
  24. novel_downloader/core/exporters/base.py +87 -7
  25. novel_downloader/core/exporters/biquge.py +5 -8
  26. novel_downloader/core/exporters/common/__init__.py +2 -2
  27. novel_downloader/core/exporters/common/epub.py +82 -166
  28. novel_downloader/core/exporters/common/main_exporter.py +0 -60
  29. novel_downloader/core/exporters/common/txt.py +82 -83
  30. novel_downloader/core/exporters/epub_util.py +157 -1330
  31. novel_downloader/core/exporters/esjzone.py +5 -8
  32. novel_downloader/core/exporters/linovelib/__init__.py +2 -2
  33. novel_downloader/core/exporters/linovelib/epub.py +157 -212
  34. novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
  35. novel_downloader/core/exporters/linovelib/txt.py +67 -63
  36. novel_downloader/core/exporters/qianbi.py +5 -8
  37. novel_downloader/core/exporters/qidian.py +14 -4
  38. novel_downloader/core/exporters/registry.py +53 -0
  39. novel_downloader/core/exporters/sfacg.py +5 -8
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/exporters/yamibo.py +5 -8
  42. novel_downloader/core/fetchers/__init__.py +19 -24
  43. novel_downloader/core/fetchers/base/__init__.py +3 -3
  44. novel_downloader/core/fetchers/base/browser.py +23 -4
  45. novel_downloader/core/fetchers/base/session.py +30 -5
  46. novel_downloader/core/fetchers/biquge/__init__.py +3 -3
  47. novel_downloader/core/fetchers/biquge/browser.py +5 -0
  48. novel_downloader/core/fetchers/biquge/session.py +6 -1
  49. novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
  50. novel_downloader/core/fetchers/esjzone/browser.py +5 -0
  51. novel_downloader/core/fetchers/esjzone/session.py +6 -1
  52. novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
  53. novel_downloader/core/fetchers/linovelib/browser.py +6 -1
  54. novel_downloader/core/fetchers/linovelib/session.py +6 -1
  55. novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
  56. novel_downloader/core/fetchers/qianbi/browser.py +5 -0
  57. novel_downloader/core/fetchers/qianbi/session.py +5 -0
  58. novel_downloader/core/fetchers/qidian/__init__.py +3 -3
  59. novel_downloader/core/fetchers/qidian/browser.py +12 -4
  60. novel_downloader/core/fetchers/qidian/session.py +11 -3
  61. novel_downloader/core/fetchers/registry.py +71 -0
  62. novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
  63. novel_downloader/core/fetchers/sfacg/browser.py +5 -0
  64. novel_downloader/core/fetchers/sfacg/session.py +5 -0
  65. novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
  66. novel_downloader/core/fetchers/yamibo/browser.py +5 -0
  67. novel_downloader/core/fetchers/yamibo/session.py +6 -1
  68. novel_downloader/core/interfaces/__init__.py +7 -5
  69. novel_downloader/core/interfaces/searcher.py +18 -0
  70. novel_downloader/core/parsers/__init__.py +10 -11
  71. novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
  72. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
  73. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
  74. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
  75. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  76. novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
  77. novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
  78. novel_downloader/core/parsers/qidian/main_parser.py +10 -21
  79. novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
  80. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
  81. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  82. novel_downloader/core/parsers/registry.py +68 -0
  83. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
  84. novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
  85. novel_downloader/core/searchers/__init__.py +20 -0
  86. novel_downloader/core/searchers/base.py +92 -0
  87. novel_downloader/core/searchers/biquge.py +83 -0
  88. novel_downloader/core/searchers/esjzone.py +84 -0
  89. novel_downloader/core/searchers/qianbi.py +131 -0
  90. novel_downloader/core/searchers/qidian.py +87 -0
  91. novel_downloader/core/searchers/registry.py +63 -0
  92. novel_downloader/locales/en.json +12 -4
  93. novel_downloader/locales/zh.json +12 -4
  94. novel_downloader/models/__init__.py +4 -30
  95. novel_downloader/models/config.py +12 -6
  96. novel_downloader/models/search.py +16 -0
  97. novel_downloader/models/types.py +0 -2
  98. novel_downloader/resources/config/settings.toml +31 -4
  99. novel_downloader/resources/css_styles/intro.css +83 -0
  100. novel_downloader/resources/css_styles/main.css +30 -89
  101. novel_downloader/utils/__init__.py +52 -0
  102. novel_downloader/utils/chapter_storage.py +244 -224
  103. novel_downloader/utils/constants.py +1 -21
  104. novel_downloader/utils/epub/__init__.py +34 -0
  105. novel_downloader/utils/epub/builder.py +377 -0
  106. novel_downloader/utils/epub/constants.py +77 -0
  107. novel_downloader/utils/epub/documents.py +403 -0
  108. novel_downloader/utils/epub/models.py +134 -0
  109. novel_downloader/utils/epub/utils.py +212 -0
  110. novel_downloader/utils/file_utils/__init__.py +10 -14
  111. novel_downloader/utils/file_utils/io.py +20 -51
  112. novel_downloader/utils/file_utils/normalize.py +2 -2
  113. novel_downloader/utils/file_utils/sanitize.py +2 -3
  114. novel_downloader/utils/fontocr/__init__.py +5 -5
  115. novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
  116. novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
  117. novel_downloader/utils/fontocr/ocr_v1.py +13 -1
  118. novel_downloader/utils/fontocr/ocr_v2.py +13 -1
  119. novel_downloader/utils/fontocr/ocr_v3.py +744 -0
  120. novel_downloader/utils/i18n.py +2 -0
  121. novel_downloader/utils/logger.py +2 -0
  122. novel_downloader/utils/network.py +110 -251
  123. novel_downloader/utils/state.py +1 -0
  124. novel_downloader/utils/text_utils/__init__.py +18 -17
  125. novel_downloader/utils/text_utils/diff_display.py +4 -5
  126. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  127. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  128. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  129. novel_downloader/utils/time_utils/__init__.py +3 -3
  130. novel_downloader/utils/time_utils/datetime_utils.py +4 -5
  131. novel_downloader/utils/time_utils/sleep_utils.py +2 -3
  132. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
  133. novel_downloader-1.5.0.dist-info/RECORD +164 -0
  134. novel_downloader/config/site_rules.py +0 -94
  135. novel_downloader/core/factory/__init__.py +0 -20
  136. novel_downloader/core/factory/downloader.py +0 -73
  137. novel_downloader/core/factory/exporter.py +0 -58
  138. novel_downloader/core/factory/fetcher.py +0 -96
  139. novel_downloader/core/factory/parser.py +0 -86
  140. novel_downloader/core/fetchers/common/__init__.py +0 -14
  141. novel_downloader/core/fetchers/common/browser.py +0 -79
  142. novel_downloader/core/fetchers/common/session.py +0 -79
  143. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  144. novel_downloader/core/parsers/common/__init__.py +0 -13
  145. novel_downloader/core/parsers/common/helper.py +0 -323
  146. novel_downloader/core/parsers/common/main_parser.py +0 -106
  147. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  148. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  149. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  150. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  151. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  152. novel_downloader/models/browser.py +0 -21
  153. novel_downloader/models/site_rules.py +0 -99
  154. novel_downloader/models/tasks.py +0 -33
  155. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  156. novel_downloader/resources/json/replace_word_map.json +0 -4
  157. novel_downloader/resources/text/blacklist.txt +0 -22
  158. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  159. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  160. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  161. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  162. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
  163. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
  164. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
  165. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -3,22 +3,19 @@
3
3
  novel_downloader.core.exporters.linovelib.txt
4
4
  ---------------------------------------------
5
5
 
6
- Contains the logic for exporting novel content as a single `.txt` file.
7
-
8
- This module defines `linovelib_export_as_txt` function, which assembles and formats
9
- a novel based on metadata and chapter files found in the raw data directory.
10
- It is intended to be used by `LinovelibExporter` as part of the save/export process.
6
+ Defines `linovelib_export_as_txt` to assemble and export a Linovelib novel
7
+ into a single `.txt` file. Intended for use by `LinovelibExporter`.
11
8
  """
12
9
 
13
10
  from __future__ import annotations
14
11
 
15
- import json
16
12
  from typing import TYPE_CHECKING
17
13
 
18
- from novel_downloader.utils.file_utils import save_as_txt
19
- from novel_downloader.utils.text_utils import (
20
- format_chapter,
14
+ from novel_downloader.core.exporters.txt_util import (
15
+ build_txt_chapter,
16
+ build_txt_header,
21
17
  )
18
+ from novel_downloader.utils import get_cleaner, save_as_txt
22
19
 
23
20
  if TYPE_CHECKING:
24
21
  from .main_exporter import LinovelibExporter
@@ -29,56 +26,69 @@ def linovelib_export_as_txt(
29
26
  book_id: str,
30
27
  ) -> None:
31
28
  """
32
- save_path 文件夹中该小说的所有章节 json 文件合并保存为一个完整的 txt 文件,
33
- 并保存到 out_path 下
34
-
35
- 处理流程:
36
- 1. book_info.json 中加载书籍信息 (包含书名、作者、简介及卷章节列表)
37
- 2. 遍历各卷, 每个卷先追加卷标题, 然后依次追加该卷下各章节的标题和内容
38
- 3. 将书籍元信息 (书名、作者、原文截至、内容简介) 与所有章节内容拼接
39
- 4. 将最终结果保存到 out_path (例如:`{book_name}.txt`)
40
-
41
- :param book_id: Identifier of the novel (used as subdirectory name).
29
+ Export a novel as a single text file by merging all chapter data.
30
+
31
+ Steps:
32
+ 1. Read metadata from `book_info.json`.
33
+ 2. For each volume:
34
+ - Clean & append the volume title.
35
+ - Clean & append optional volume intro.
36
+ - Batch-fetch all chapters in this volume to minimize SQLite overhead.
37
+ - For each chapter: clean title & content, then append.
38
+ 3. Build a header block with metadata.
39
+ 4. Concatenate header + all chapter blocks, then save as `{book_name}.txt`.
40
+
41
+ :param exporter: The LinovelibExporter instance.
42
+ :param book_id: Identifier of the novel (subdirectory under raw data).
42
43
  """
43
44
  TAG = "[exporter]"
44
45
  # --- Paths & options ---
45
- raw_base = exporter._raw_data_dir / book_id
46
46
  out_dir = exporter.output_dir
47
47
  out_dir.mkdir(parents=True, exist_ok=True)
48
+ cleaner = get_cleaner(
49
+ enabled=exporter._config.clean_text,
50
+ config=exporter._config.cleaner_cfg,
51
+ )
48
52
 
49
53
  # --- Load book_info.json ---
50
- info_path = raw_base / "book_info.json"
51
- try:
52
- info_text = info_path.read_text(encoding="utf-8")
53
- book_info = json.loads(info_text)
54
- except Exception as e:
55
- exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
54
+ book_info = exporter._load_book_info(book_id)
55
+ if not book_info:
56
56
  return
57
57
 
58
58
  # --- Compile chapters ---
59
59
  parts: list[str] = []
60
- volumes = book_info.get("volumes", [])
61
-
62
- for vol in volumes:
63
- vol_name = vol.get("volume_name", "").strip()
64
- vol_intro = vol.get("volume_intro", "").strip()
65
- if vol_name:
66
- volume_header = f"\n\n{'=' * 6} {vol_name} {'=' * 6}\n\n"
67
- parts.append(volume_header)
68
- exporter.logger.info("%s Processing volume: %s", TAG, vol_name)
60
+
61
+ for vol in book_info.get("volumes", []):
62
+ vol_title = cleaner.clean_title(vol.get("volume_name", ""))
63
+ if vol_title:
64
+ parts.append(f"\n\n{'=' * 6} {vol_title} {'=' * 6}\n\n")
65
+ exporter.logger.info("%s Processing volume: %s", TAG, vol_title)
66
+
67
+ vol_intro = cleaner.clean_content(vol.get("volume_intro", ""))
69
68
  if vol_intro:
70
69
  parts.append(f"{vol_intro}\n\n")
71
- for chap in vol.get("chapters", []):
72
- chap_id = chap.get("chapterId")
73
- chap_title = chap.get("title", "")
70
+
71
+ # Batch-fetch chapters for this volume
72
+ chap_ids = [
73
+ chap.get("chapterId")
74
+ for chap in vol.get("chapters", [])
75
+ if chap.get("chapterId")
76
+ ]
77
+ chap_map = exporter._get_chapters(book_id, chap_ids)
78
+
79
+ for chap_meta in vol.get("chapters", []):
80
+ chap_id = chap_meta.get("chapterId")
74
81
  if not chap_id:
75
- exporter.logger.warning("%s Missing chapterId, skipping: %s", TAG, chap)
82
+ exporter.logger.warning(
83
+ "%s Missing chapterId, skipping: %s", TAG, chap_meta
84
+ )
76
85
  continue
77
86
 
78
- chapter_data = exporter._get_chapter(book_id, chap_id)
79
- if not chapter_data:
87
+ chap_title = cleaner.clean_title(chap_meta.get("title", ""))
88
+ data = chap_map.get(chap_id)
89
+ if not data:
80
90
  exporter.logger.info(
81
- "%s Missing chapter file in: %s (%s), skipping.",
91
+ "%s Missing chapter: %s (%s), skipping.",
82
92
  TAG,
83
93
  chap_title,
84
94
  chap_id,
@@ -86,33 +96,27 @@ def linovelib_export_as_txt(
86
96
  continue
87
97
 
88
98
  # Extract structured fields
89
- title = chapter_data.get("title", chap_title).strip()
90
- content = chapter_data.get("content", "").strip()
99
+ title = cleaner.clean_title(data.get("title", chap_title))
100
+ content = cleaner.clean_content(data.get("content", ""))
91
101
 
92
- parts.append(format_chapter(title, content, ""))
102
+ parts.append(build_txt_chapter(title=title, paragraphs=content, extras={}))
93
103
 
94
104
  # --- Build header ---
95
- name = book_info.get("book_name")
96
- author = book_info.get("author")
97
- words = book_info.get("word_count")
98
- updated = book_info.get("update_time")
99
- summary = book_info.get("summary")
105
+ name = book_info.get("book_name") or ""
106
+ author = book_info.get("author") or ""
107
+ words = book_info.get("word_count") or ""
108
+ updated = book_info.get("update_time") or ""
109
+ summary = book_info.get("summary") or ""
100
110
 
101
- fields = [
111
+ header_fields = [
102
112
  ("书名", name),
103
113
  ("作者", author),
104
114
  ("总字数", words),
105
115
  ("更新日期", updated),
116
+ ("内容简介", summary),
106
117
  ]
107
- header_lines = [f"{label}: {value}" for label, value in fields if value]
108
-
109
- if summary:
110
- header_lines.append("内容简介:")
111
- header_lines.append(summary)
112
-
113
- header_lines += ["", "-" * 10, ""]
114
118
 
115
- header = "\n".join(header_lines)
119
+ header = build_txt_header(header_fields)
116
120
 
117
121
  final_text = header + "\n\n" + "\n\n".join(parts).strip()
118
122
 
@@ -121,9 +125,9 @@ def linovelib_export_as_txt(
121
125
  out_path = out_dir / out_name
122
126
 
123
127
  # --- Save final text ---
124
- try:
125
- save_as_txt(content=final_text, filepath=out_path)
128
+ result = save_as_txt(content=final_text, filepath=out_path)
129
+ if result:
126
130
  exporter.logger.info("%s Novel saved to: %s", TAG, out_path)
127
- except Exception as e:
128
- exporter.logger.error("%s Failed to save file: %s", TAG, e)
131
+ else:
132
+ exporter.logger.error("%s Failed to write novel to %s", TAG, out_path)
129
133
  return
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.qianbi
5
5
 
6
6
  """
7
7
 
8
+ __all__ = ["QianbiExporter"]
9
+
10
+ from novel_downloader.core.exporters.registry import register_exporter
8
11
  from novel_downloader.models import ExporterConfig
9
12
 
10
13
  from .common import CommonExporter
11
14
 
12
15
 
16
+ @register_exporter(site_keys=["qianbi"])
13
17
  class QianbiExporter(CommonExporter):
14
18
  def __init__(
15
19
  self,
16
20
  config: ExporterConfig,
17
21
  ):
18
- super().__init__(
19
- config,
20
- site="qianbi",
21
- chap_folders=["chapters"],
22
- )
23
-
24
-
25
- __all__ = ["QianbiExporter"]
22
+ super().__init__(config, site="qianbi")
@@ -8,12 +8,25 @@ of novels sourced from Qidian (起点中文网). It implements the platform-spec
8
8
  logic required to structure and export novel content into desired formats.
9
9
  """
10
10
 
11
+ __all__ = ["QidianExporter"]
12
+
13
+ from novel_downloader.core.exporters.registry import register_exporter
11
14
  from novel_downloader.models import ExporterConfig
12
15
 
13
16
  from .common import CommonExporter
14
17
 
15
18
 
19
+ @register_exporter(site_keys=["qidian", "qd"])
16
20
  class QidianExporter(CommonExporter):
21
+ """ """
22
+
23
+ DEFAULT_SOURCE_ID = 0
24
+ ENCRYPTED_SOURCE_ID = 1
25
+ PRIORITIES_MAP = {
26
+ DEFAULT_SOURCE_ID: 0,
27
+ ENCRYPTED_SOURCE_ID: 1,
28
+ }
29
+
17
30
  def __init__(
18
31
  self,
19
32
  config: ExporterConfig,
@@ -21,8 +34,5 @@ class QidianExporter(CommonExporter):
21
34
  super().__init__(
22
35
  config,
23
36
  site="qidian",
24
- chap_folders=["chapters", "encrypted_chapters"],
37
+ priorities=self.PRIORITIES_MAP,
25
38
  )
26
-
27
-
28
- __all__ = ["QidianExporter"]
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.exporters.registry
4
+ ----------------------------------------
5
+
6
+ """
7
+
8
+ __all__ = ["register_exporter", "get_exporter"]
9
+
10
+ from collections.abc import Callable, Sequence
11
+ from typing import TypeVar
12
+
13
+ from novel_downloader.core.interfaces import ExporterProtocol
14
+ from novel_downloader.models import ExporterConfig
15
+
16
+ ExporterBuilder = Callable[[ExporterConfig], ExporterProtocol]
17
+
18
+ E = TypeVar("E", bound=ExporterProtocol)
19
+ _EXPORTER_MAP: dict[str, ExporterBuilder] = {}
20
+
21
+
22
+ def register_exporter(
23
+ site_keys: Sequence[str],
24
+ ) -> Callable[[type[E]], type[E]]:
25
+ """
26
+ Decorator to register a exporter class under given keys.
27
+
28
+ :param site_keys: Sequence of site identifiers
29
+ :return: A class decorator that populates _EXPORTER_MAP.
30
+ """
31
+
32
+ def decorator(cls: type[E]) -> type[E]:
33
+ for key in site_keys:
34
+ _EXPORTER_MAP[key.lower()] = cls
35
+ return cls
36
+
37
+ return decorator
38
+
39
+
40
+ def get_exporter(site: str, config: ExporterConfig) -> ExporterProtocol:
41
+ """
42
+ Returns a site-specific exporter instance.
43
+
44
+ :param site: Site name (e.g., 'qidian')
45
+ :param config: Configuration for the exporter
46
+ :return: An instance of a exporter class
47
+ """
48
+ site_key = site.lower()
49
+ try:
50
+ exporter_cls = _EXPORTER_MAP[site_key]
51
+ except KeyError as err:
52
+ raise ValueError(f"Unsupported site: {site}") from err
53
+ return exporter_cls(config)
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.sfacg
5
5
 
6
6
  """
7
7
 
8
+ __all__ = ["SfacgExporter"]
9
+
10
+ from novel_downloader.core.exporters.registry import register_exporter
8
11
  from novel_downloader.models import ExporterConfig
9
12
 
10
13
  from .common import CommonExporter
11
14
 
12
15
 
16
+ @register_exporter(site_keys=["sfacg"])
13
17
  class SfacgExporter(CommonExporter):
14
18
  def __init__(
15
19
  self,
16
20
  config: ExporterConfig,
17
21
  ):
18
- super().__init__(
19
- config,
20
- site="sfacg",
21
- chap_folders=["chapters"],
22
- )
23
-
24
-
25
- __all__ = ["SfacgExporter"]
22
+ super().__init__(config, site="sfacg")
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.exporters.txt_util
4
+ ----------------------------------------
5
+
6
+ Utilities for generating plain-text exports of novel content.
7
+ """
8
+
9
+ __all__ = [
10
+ "build_txt_header",
11
+ "build_txt_chapter",
12
+ ]
13
+
14
+ import re
15
+
16
+ _IMG_TAG_RE = re.compile(r"<img[^>]*>")
17
+
18
+
19
+ def build_txt_header(fields: list[tuple[str, str]]) -> str:
20
+ """
21
+ Build a simple text header from label-value pairs, followed by a dashed separator.
22
+
23
+ :param fields: List of (label, value) pairs.
24
+ :return: A single string containing the formatted header.
25
+ """
26
+ header_lines = [f"{label}: {value}" for label, value in fields if value]
27
+ header_lines += ["", "-" * 10, ""]
28
+ return "\n".join(header_lines)
29
+
30
+
31
+ def build_txt_chapter(
32
+ title: str,
33
+ paragraphs: str,
34
+ extras: dict[str, str] | None = None,
35
+ ) -> str:
36
+ """
37
+ Build a formatted chapter text block including title, body paragraphs,
38
+ and optional extra sections.
39
+
40
+ - Strips any `<img...>` tags from paragraphs.
41
+ - Title appears first (stripped of surrounding whitespace).
42
+ - Each non-blank line in `paragraphs` becomes its own paragraph.
43
+
44
+ :param title: Chapter title.
45
+ :param paragraphs: Raw multi-line string. Blank lines are ignored.
46
+ :param extras: Optional dict mapping section titles to multi-line strings.
47
+ :return: A string where title, paragraphs, and extras are joined by lines.
48
+ """
49
+ parts: list[str] = [title.strip()]
50
+
51
+ # add each nonempty paragraph line
52
+ paragraphs = _IMG_TAG_RE.sub("", paragraphs)
53
+ for ln in paragraphs.splitlines():
54
+ line = ln.strip()
55
+ if line:
56
+ parts.append(line)
57
+
58
+ if extras:
59
+ for title, text in extras.items():
60
+ lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
61
+ if not lines:
62
+ continue
63
+ parts.append("---")
64
+ parts.append(title.strip())
65
+ parts.extend(lines)
66
+
67
+ return "\n\n".join(parts)
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.yamibo
5
5
 
6
6
  """
7
7
 
8
+ __all__ = ["YamiboExporter"]
9
+
10
+ from novel_downloader.core.exporters.registry import register_exporter
8
11
  from novel_downloader.models import ExporterConfig
9
12
 
10
13
  from .common import CommonExporter
11
14
 
12
15
 
16
+ @register_exporter(site_keys=["yamibo"])
13
17
  class YamiboExporter(CommonExporter):
14
18
  def __init__(
15
19
  self,
16
20
  config: ExporterConfig,
17
21
  ):
18
- super().__init__(
19
- config,
20
- site="yamibo",
21
- chap_folders=["chapters"],
22
- )
23
-
24
-
25
- __all__ = ["YamiboExporter"]
22
+ super().__init__(config, site="yamibo")
@@ -16,17 +16,30 @@ Subpackages:
16
16
  - qidian (起点中文网)
17
17
  - sfacg (SF轻小说)
18
18
  - yamibo (百合会)
19
- - common (通用架构)
20
19
  """
21
20
 
21
+ __all__ = [
22
+ "get_fetcher",
23
+ "BiqugeBrowser",
24
+ "BiqugeSession",
25
+ "EsjzoneBrowser",
26
+ "EsjzoneSession",
27
+ "LinovelibBrowser",
28
+ "LinovelibSession",
29
+ "QianbiBrowser",
30
+ "QianbiSession",
31
+ "QidianBrowser",
32
+ "QidianSession",
33
+ "SfacgBrowser",
34
+ "SfacgSession",
35
+ "YamiboBrowser",
36
+ "YamiboSession",
37
+ ]
38
+
22
39
  from .biquge import (
23
40
  BiqugeBrowser,
24
41
  BiqugeSession,
25
42
  )
26
- from .common import (
27
- CommonBrowser,
28
- CommonSession,
29
- )
30
43
  from .esjzone import (
31
44
  EsjzoneBrowser,
32
45
  EsjzoneSession,
@@ -43,6 +56,7 @@ from .qidian import (
43
56
  QidianBrowser,
44
57
  QidianSession,
45
58
  )
59
+ from .registry import get_fetcher
46
60
  from .sfacg import (
47
61
  SfacgBrowser,
48
62
  SfacgSession,
@@ -51,22 +65,3 @@ from .yamibo import (
51
65
  YamiboBrowser,
52
66
  YamiboSession,
53
67
  )
54
-
55
- __all__ = [
56
- "BiqugeBrowser",
57
- "BiqugeSession",
58
- "CommonBrowser",
59
- "CommonSession",
60
- "EsjzoneBrowser",
61
- "EsjzoneSession",
62
- "LinovelibBrowser",
63
- "LinovelibSession",
64
- "QianbiBrowser",
65
- "QianbiSession",
66
- "QidianBrowser",
67
- "QidianSession",
68
- "SfacgBrowser",
69
- "SfacgSession",
70
- "YamiboBrowser",
71
- "YamiboSession",
72
- ]
@@ -5,10 +5,10 @@ novel_downloader.core.fetchers.base
5
5
 
6
6
  """
7
7
 
8
- from .browser import BaseBrowser
9
- from .session import BaseSession
10
-
11
8
  __all__ = [
12
9
  "BaseBrowser",
13
10
  "BaseSession",
14
11
  ]
12
+
13
+ from .browser import BaseBrowser
14
+ from .session import BaseSession
@@ -6,9 +6,11 @@ novel_downloader.core.fetchers.base.browser
6
6
  """
7
7
 
8
8
  import abc
9
+ import asyncio
9
10
  import logging
10
11
  import types
11
- from typing import Any, Literal, Self
12
+ from pathlib import Path
13
+ from typing import Any, Literal, Self, TypedDict
12
14
 
13
15
  from playwright.async_api import (
14
16
  Browser,
@@ -21,7 +23,7 @@ from playwright.async_api import (
21
23
  )
22
24
 
23
25
  from novel_downloader.core.interfaces import FetcherProtocol
24
- from novel_downloader.models import FetcherConfig, LoginField, NewContextOptions
26
+ from novel_downloader.models import FetcherConfig, LoginField
25
27
  from novel_downloader.utils.constants import (
26
28
  DATA_DIR,
27
29
  DEFAULT_USER_AGENT,
@@ -37,6 +39,16 @@ window.chrome = { runtime: {} };
37
39
  """.strip()
38
40
 
39
41
 
42
+ class NewContextOptions(TypedDict, total=False):
43
+ user_agent: str
44
+ locale: str
45
+ storage_state: Path
46
+ viewport: ViewportSize
47
+ java_script_enabled: bool
48
+ ignore_https_errors: bool
49
+ extra_http_headers: dict[str, str]
50
+
51
+
40
52
  class BaseBrowser(FetcherProtocol, abc.ABC):
41
53
  """
42
54
  BaseBrowser wraps basic browser operations using playwright
@@ -198,12 +210,15 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
198
210
  wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
199
211
  | None = "load",
200
212
  referer: str | None = None,
213
+ delay: float = 0.0,
201
214
  **kwargs: Any,
202
215
  ) -> str:
203
216
  if self._reuse_page:
204
- return await self._fetch_with_reuse(url, wait_until, referer, **kwargs)
217
+ return await self._fetch_with_reuse(
218
+ url, wait_until, referer, delay, **kwargs
219
+ )
205
220
  else:
206
- return await self._fetch_with_new(url, wait_until, referer, **kwargs)
221
+ return await self._fetch_with_new(url, wait_until, referer, delay, **kwargs)
207
222
 
208
223
  async def load_state(self) -> bool:
209
224
  """ """
@@ -282,11 +297,13 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
282
297
  wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
283
298
  | None = "load",
284
299
  referer: str | None = None,
300
+ delay: float = 0.0,
285
301
  **kwargs: Any,
286
302
  ) -> str:
287
303
  page = await self.context.new_page()
288
304
  try:
289
305
  await page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
306
+ await asyncio.sleep(delay)
290
307
  html: str = await page.content()
291
308
  return html
292
309
  finally:
@@ -298,11 +315,13 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
298
315
  wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
299
316
  | None = "load",
300
317
  referer: str | None = None,
318
+ delay: float = 0.0,
301
319
  **kwargs: Any,
302
320
  ) -> str:
303
321
  if not self._page:
304
322
  self._page = await self.context.new_page()
305
323
  await self._page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
324
+ await asyncio.sleep(delay)
306
325
  html: str = await self._page.content()
307
326
  return html
308
327
 
@@ -21,12 +21,14 @@ from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
21
21
 
22
22
  from novel_downloader.core.interfaces import FetcherProtocol
23
23
  from novel_downloader.models import FetcherConfig, LoginField
24
+ from novel_downloader.utils import (
25
+ async_sleep_with_random_delay,
26
+ parse_cookie_expires,
27
+ )
24
28
  from novel_downloader.utils.constants import (
25
29
  DATA_DIR,
26
30
  DEFAULT_USER_HEADERS,
27
31
  )
28
- from novel_downloader.utils.cookies import parse_cookie_expires
29
- from novel_downloader.utils.time_utils import async_sleep_with_random_delay
30
32
 
31
33
  from .rate_limiter import TokenBucketRateLimiter
32
34
 
@@ -156,7 +158,12 @@ class BaseSession(FetcherProtocol, abc.ABC):
156
158
  await self._session.close()
157
159
  self._session = None
158
160
 
159
- async def fetch(self, url: str, **kwargs: Any) -> str:
161
+ async def fetch(
162
+ self,
163
+ url: str,
164
+ encoding: str | None = None,
165
+ **kwargs: Any,
166
+ ) -> str:
160
167
  """
161
168
  Fetch the content from the given URL asynchronously, with retry support.
162
169
 
@@ -172,8 +179,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
172
179
  try:
173
180
  async with self.session.get(url, **kwargs) as resp:
174
181
  resp.raise_for_status()
175
- text: str = await resp.text()
176
- return text
182
+ return await self._response_to_str(resp, encoding)
177
183
  except aiohttp.ClientError:
178
184
  if attempt < self.retry_times:
179
185
  await async_sleep_with_random_delay(
@@ -405,6 +411,25 @@ class BaseSession(FetcherProtocol, abc.ABC):
405
411
  return dict(self._session.headers)
406
412
  return self._headers.copy()
407
413
 
414
+ @staticmethod
415
+ async def _response_to_str(
416
+ resp: ClientResponse,
417
+ encoding: str | None = None,
418
+ ) -> str:
419
+ """
420
+ Read the full body of resp as text. First try the declared charset,
421
+ then on UnicodeDecodeError fall back to a lenient utf-8 decode.
422
+ """
423
+ data: bytes = await resp.read()
424
+ encodings = [encoding, resp.charset, "utf-8", "gb18030", "gbk"]
425
+ encodings_list: list[str] = [e for e in encodings if e]
426
+ for enc in encodings_list:
427
+ try:
428
+ return data.decode(enc)
429
+ except UnicodeDecodeError:
430
+ continue
431
+ return data.decode("utf-8", errors="ignore")
432
+
408
433
  async def __aenter__(self) -> Self:
409
434
  if self._session is None or self._session.closed:
410
435
  await self.init()
@@ -5,10 +5,10 @@ novel_downloader.core.fetchers.biquge
5
5
 
6
6
  """
7
7
 
8
- from .browser import BiqugeBrowser
9
- from .session import BiqugeSession
10
-
11
8
  __all__ = [
12
9
  "BiqugeBrowser",
13
10
  "BiqugeSession",
14
11
  ]
12
+
13
+ from .browser import BiqugeBrowser
14
+ from .session import BiqugeSession