PyPI - novel-downloader - Versions diffs - 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

novel-downloader 1.4.5py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +2 -2
novel_downloader/cli/config.py +1 -83
novel_downloader/cli/download.py +4 -5
novel_downloader/cli/export.py +4 -1
novel_downloader/cli/main.py +2 -0
novel_downloader/cli/search.py +123 -0
novel_downloader/config/__init__.py +3 -10
novel_downloader/config/adapter.py +190 -54
novel_downloader/config/loader.py +2 -3
novel_downloader/core/__init__.py +13 -13
novel_downloader/core/downloaders/__init__.py +10 -11
novel_downloader/core/downloaders/base.py +152 -26
novel_downloader/core/downloaders/biquge.py +5 -1
novel_downloader/core/downloaders/common.py +157 -378
novel_downloader/core/downloaders/esjzone.py +5 -1
novel_downloader/core/downloaders/linovelib.py +5 -1
novel_downloader/core/downloaders/qianbi.py +291 -4
novel_downloader/core/downloaders/qidian.py +199 -285
novel_downloader/core/downloaders/registry.py +67 -0
novel_downloader/core/downloaders/sfacg.py +5 -1
novel_downloader/core/downloaders/yamibo.py +5 -1
novel_downloader/core/exporters/__init__.py +10 -11
novel_downloader/core/exporters/base.py +87 -7
novel_downloader/core/exporters/biquge.py +5 -8
novel_downloader/core/exporters/common/__init__.py +2 -2
novel_downloader/core/exporters/common/epub.py +82 -166
novel_downloader/core/exporters/common/main_exporter.py +0 -60
novel_downloader/core/exporters/common/txt.py +82 -83
novel_downloader/core/exporters/epub_util.py +157 -1330
novel_downloader/core/exporters/esjzone.py +5 -8
novel_downloader/core/exporters/linovelib/__init__.py +2 -2
novel_downloader/core/exporters/linovelib/epub.py +157 -212
novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
novel_downloader/core/exporters/linovelib/txt.py +67 -63
novel_downloader/core/exporters/qianbi.py +5 -8
novel_downloader/core/exporters/qidian.py +14 -4
novel_downloader/core/exporters/registry.py +53 -0
novel_downloader/core/exporters/sfacg.py +5 -8
novel_downloader/core/exporters/txt_util.py +67 -0
novel_downloader/core/exporters/yamibo.py +5 -8
novel_downloader/core/fetchers/__init__.py +19 -24
novel_downloader/core/fetchers/base/__init__.py +3 -3
novel_downloader/core/fetchers/base/browser.py +23 -4
novel_downloader/core/fetchers/base/session.py +30 -5
novel_downloader/core/fetchers/biquge/__init__.py +3 -3
novel_downloader/core/fetchers/biquge/browser.py +5 -0
novel_downloader/core/fetchers/biquge/session.py +6 -1
novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
novel_downloader/core/fetchers/esjzone/browser.py +5 -0
novel_downloader/core/fetchers/esjzone/session.py +6 -1
novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
novel_downloader/core/fetchers/linovelib/browser.py +6 -1
novel_downloader/core/fetchers/linovelib/session.py +6 -1
novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
novel_downloader/core/fetchers/qianbi/browser.py +5 -0
novel_downloader/core/fetchers/qianbi/session.py +5 -0
novel_downloader/core/fetchers/qidian/__init__.py +3 -3
novel_downloader/core/fetchers/qidian/browser.py +12 -4
novel_downloader/core/fetchers/qidian/session.py +11 -3
novel_downloader/core/fetchers/registry.py +71 -0
novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
novel_downloader/core/fetchers/sfacg/browser.py +5 -0
novel_downloader/core/fetchers/sfacg/session.py +5 -0
novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
novel_downloader/core/fetchers/yamibo/browser.py +5 -0
novel_downloader/core/fetchers/yamibo/session.py +6 -1
novel_downloader/core/interfaces/__init__.py +7 -5
novel_downloader/core/interfaces/searcher.py +18 -0
novel_downloader/core/parsers/__init__.py +10 -11
novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
novel_downloader/core/parsers/qidian/__init__.py +2 -2
novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
novel_downloader/core/parsers/qidian/main_parser.py +10 -21
novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
novel_downloader/core/parsers/registry.py +68 -0
novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
novel_downloader/core/searchers/__init__.py +20 -0
novel_downloader/core/searchers/base.py +92 -0
novel_downloader/core/searchers/biquge.py +83 -0
novel_downloader/core/searchers/esjzone.py +84 -0
novel_downloader/core/searchers/qianbi.py +131 -0
novel_downloader/core/searchers/qidian.py +87 -0
novel_downloader/core/searchers/registry.py +63 -0
novel_downloader/locales/en.json +12 -4
novel_downloader/locales/zh.json +12 -4
novel_downloader/models/__init__.py +4 -30
novel_downloader/models/config.py +12 -6
novel_downloader/models/search.py +16 -0
novel_downloader/models/types.py +0 -2
novel_downloader/resources/config/settings.toml +31 -4
novel_downloader/resources/css_styles/intro.css +83 -0
novel_downloader/resources/css_styles/main.css +30 -89
novel_downloader/utils/__init__.py +52 -0
novel_downloader/utils/chapter_storage.py +244 -224
novel_downloader/utils/constants.py +1 -21
novel_downloader/utils/epub/__init__.py +34 -0
novel_downloader/utils/epub/builder.py +377 -0
novel_downloader/utils/epub/constants.py +77 -0
novel_downloader/utils/epub/documents.py +403 -0
novel_downloader/utils/epub/models.py +134 -0
novel_downloader/utils/epub/utils.py +212 -0
novel_downloader/utils/file_utils/__init__.py +10 -14
novel_downloader/utils/file_utils/io.py +20 -51
novel_downloader/utils/file_utils/normalize.py +2 -2
novel_downloader/utils/file_utils/sanitize.py +2 -3
novel_downloader/utils/fontocr/__init__.py +5 -5
novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
novel_downloader/utils/fontocr/ocr_v1.py +13 -1
novel_downloader/utils/fontocr/ocr_v2.py +13 -1
novel_downloader/utils/fontocr/ocr_v3.py +744 -0
novel_downloader/utils/i18n.py +2 -0
novel_downloader/utils/logger.py +2 -0
novel_downloader/utils/network.py +110 -251
novel_downloader/utils/state.py +1 -0
novel_downloader/utils/text_utils/__init__.py +18 -17
novel_downloader/utils/text_utils/diff_display.py +4 -5
novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
novel_downloader/utils/text_utils/text_cleaner.py +179 -0
novel_downloader/utils/text_utils/truncate_utils.py +62 -0
novel_downloader/utils/time_utils/__init__.py +3 -3
novel_downloader/utils/time_utils/datetime_utils.py +4 -5
novel_downloader/utils/time_utils/sleep_utils.py +2 -3
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
novel_downloader-1.5.0.dist-info/RECORD +164 -0
novel_downloader/config/site_rules.py +0 -94
novel_downloader/core/factory/__init__.py +0 -20
novel_downloader/core/factory/downloader.py +0 -73
novel_downloader/core/factory/exporter.py +0 -58
novel_downloader/core/factory/fetcher.py +0 -96
novel_downloader/core/factory/parser.py +0 -86
novel_downloader/core/fetchers/common/__init__.py +0 -14
novel_downloader/core/fetchers/common/browser.py +0 -79
novel_downloader/core/fetchers/common/session.py +0 -79
novel_downloader/core/parsers/biquge/__init__.py +0 -10
novel_downloader/core/parsers/common/__init__.py +0 -13
novel_downloader/core/parsers/common/helper.py +0 -323
novel_downloader/core/parsers/common/main_parser.py +0 -106
novel_downloader/core/parsers/esjzone/__init__.py +0 -10
novel_downloader/core/parsers/linovelib/__init__.py +0 -10
novel_downloader/core/parsers/qianbi/__init__.py +0 -10
novel_downloader/core/parsers/sfacg/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/__init__.py +0 -10
novel_downloader/models/browser.py +0 -21
novel_downloader/models/site_rules.py +0 -99
novel_downloader/models/tasks.py +0 -33
novel_downloader/resources/css_styles/volume-intro.css +0 -56
novel_downloader/resources/json/replace_word_map.json +0 -4
novel_downloader/resources/text/blacklist.txt +0 -22
novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
novel_downloader/utils/text_utils/font_mapping.py +0 -28
novel_downloader/utils/text_utils/text_cleaning.py +0 -107
novel_downloader-1.4.5.dist-info/RECORD +0 -165
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0

novel_downloader/core/exporters/linovelib/txt.py CHANGED Viewed

@@ -3,22 +3,19 @@
 novel_downloader.core.exporters.linovelib.txt
 ---------------------------------------------
-Contains the logic for exporting novel content as a single `.txt` file.
-This module defines `linovelib_export_as_txt` function, which assembles and formats
-a novel based on metadata and chapter files found in the raw data directory.
-It is intended to be used by `LinovelibExporter` as part of the save/export process.
+Defines `linovelib_export_as_txt` to assemble and export a Linovelib novel
+into a single `.txt` file. Intended for use by `LinovelibExporter`.
 """
 from __future__ import annotations
-import json
 from typing import TYPE_CHECKING
-from novel_downloader.utils.file_utils import save_as_txt
-from novel_downloader.utils.text_utils import (
-    format_chapter,
+from novel_downloader.core.exporters.txt_util import (
+    build_txt_chapter,
+    build_txt_header,
 )
+from novel_downloader.utils import get_cleaner, save_as_txt
 if TYPE_CHECKING:
     from .main_exporter import LinovelibExporter
@@ -29,56 +26,69 @@ def linovelib_export_as_txt(
     book_id: str,
 ) -> None:
     """
-    将 save_path 文件夹中该小说的所有章节 json 文件合并保存为一个完整的 txt 文件,
-    并保存到 out_path 下
-    处理流程：
-      1. 从 book_info.json 中加载书籍信息 (包含书名、作者、简介及卷章节列表)
-      2. 遍历各卷, 每个卷先追加卷标题, 然后依次追加该卷下各章节的标题和内容
-      3. 将书籍元信息 (书名、作者、原文截至、内容简介) 与所有章节内容拼接
-      4. 将最终结果保存到 out_path 下 (例如：`{book_name}.txt`)
-    :param book_id: Identifier of the novel (used as subdirectory name).
+    Export a novel as a single text file by merging all chapter data.
+    Steps:
+      1. Read metadata from `book_info.json`.
+      2. For each volume:
+         - Clean & append the volume title.
+         - Clean & append optional volume intro.
+         - Batch-fetch all chapters in this volume to minimize SQLite overhead.
+         - For each chapter: clean title & content, then append.
+      3. Build a header block with metadata.
+      4. Concatenate header + all chapter blocks, then save as `{book_name}.txt`.
+    :param exporter: The LinovelibExporter instance.
+    :param book_id: Identifier of the novel (subdirectory under raw data).
     """
     TAG = "[exporter]"
     # --- Paths & options ---
-    raw_base = exporter._raw_data_dir / book_id
     out_dir = exporter.output_dir
     out_dir.mkdir(parents=True, exist_ok=True)
+    cleaner = get_cleaner(
+        enabled=exporter._config.clean_text,
+        config=exporter._config.cleaner_cfg,
+    )
     # --- Load book_info.json ---
-    info_path = raw_base / "book_info.json"
-    try:
-        info_text = info_path.read_text(encoding="utf-8")
-        book_info = json.loads(info_text)
-    except Exception as e:
-        exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
+    book_info = exporter._load_book_info(book_id)
+    if not book_info:
         return
     # --- Compile chapters ---
     parts: list[str] = []
-    volumes = book_info.get("volumes", [])
-    for vol in volumes:
-        vol_name = vol.get("volume_name", "").strip()
-        vol_intro = vol.get("volume_intro", "").strip()
-        if vol_name:
-            volume_header = f"\n\n{'=' * 6} {vol_name} {'=' * 6}\n\n"
-            parts.append(volume_header)
-            exporter.logger.info("%s Processing volume: %s", TAG, vol_name)
+    for vol in book_info.get("volumes", []):
+        vol_title = cleaner.clean_title(vol.get("volume_name", ""))
+        if vol_title:
+            parts.append(f"\n\n{'=' * 6} {vol_title} {'=' * 6}\n\n")
+            exporter.logger.info("%s Processing volume: %s", TAG, vol_title)
+        vol_intro = cleaner.clean_content(vol.get("volume_intro", ""))
         if vol_intro:
             parts.append(f"{vol_intro}\n\n")
-        for chap in vol.get("chapters", []):
-            chap_id = chap.get("chapterId")
-            chap_title = chap.get("title", "")
+        # Batch-fetch chapters for this volume
+        chap_ids = [
+            chap.get("chapterId")
+            for chap in vol.get("chapters", [])
+            if chap.get("chapterId")
+        ]
+        chap_map = exporter._get_chapters(book_id, chap_ids)
+        for chap_meta in vol.get("chapters", []):
+            chap_id = chap_meta.get("chapterId")
             if not chap_id:
-                exporter.logger.warning("%s Missing chapterId, skipping: %s", TAG, chap)
+                exporter.logger.warning(
+                    "%s Missing chapterId, skipping: %s", TAG, chap_meta
+                )
                 continue
-            chapter_data = exporter._get_chapter(book_id, chap_id)
-            if not chapter_data:
+            chap_title = cleaner.clean_title(chap_meta.get("title", ""))
+            data = chap_map.get(chap_id)
+            if not data:
                 exporter.logger.info(
-                    "%s Missing chapter file in: %s (%s), skipping.",
+                    "%s Missing chapter: %s (%s), skipping.",
                     TAG,
                     chap_title,
                     chap_id,
@@ -86,33 +96,27 @@ def linovelib_export_as_txt(
                 continue
             # Extract structured fields
-            title = chapter_data.get("title", chap_title).strip()
-            content = chapter_data.get("content", "").strip()
+            title = cleaner.clean_title(data.get("title", chap_title))
+            content = cleaner.clean_content(data.get("content", ""))
-            parts.append(format_chapter(title, content, ""))
+            parts.append(build_txt_chapter(title=title, paragraphs=content, extras={}))
     # --- Build header ---
-    name = book_info.get("book_name")
-    author = book_info.get("author")
-    words = book_info.get("word_count")
-    updated = book_info.get("update_time")
-    summary = book_info.get("summary")
+    name = book_info.get("book_name") or ""
+    author = book_info.get("author") or ""
+    words = book_info.get("word_count") or ""
+    updated = book_info.get("update_time") or ""
+    summary = book_info.get("summary") or ""
-    fields = [
+    header_fields = [
         ("书名", name),
         ("作者", author),
         ("总字数", words),
         ("更新日期", updated),
+        ("内容简介", summary),
     ]
-    header_lines = [f"{label}: {value}" for label, value in fields if value]
-    if summary:
-        header_lines.append("内容简介:")
-        header_lines.append(summary)
-    header_lines += ["", "-" * 10, ""]
-    header = "\n".join(header_lines)
+    header = build_txt_header(header_fields)
     final_text = header + "\n\n" + "\n\n".join(parts).strip()
@@ -121,9 +125,9 @@ def linovelib_export_as_txt(
     out_path = out_dir / out_name
     # --- Save final text ---
-    try:
-        save_as_txt(content=final_text, filepath=out_path)
+    result = save_as_txt(content=final_text, filepath=out_path)
+    if result:
         exporter.logger.info("%s Novel saved to: %s", TAG, out_path)
-    except Exception as e:
-        exporter.logger.error("%s Failed to save file: %s", TAG, e)
+    else:
+        exporter.logger.error("%s Failed to write novel to %s", TAG, out_path)
     return

novel_downloader/core/exporters/qianbi.py CHANGED Viewed

@@ -5,21 +5,18 @@ novel_downloader.core.exporters.qianbi
 """
+__all__ = ["QianbiExporter"]
+from novel_downloader.core.exporters.registry import register_exporter
 from novel_downloader.models import ExporterConfig
 from .common import CommonExporter
+@register_exporter(site_keys=["qianbi"])
 class QianbiExporter(CommonExporter):
     def __init__(
         self,
         config: ExporterConfig,
     ):
-        super().__init__(
-            config,
-            site="qianbi",
-            chap_folders=["chapters"],
-        )
-__all__ = ["QianbiExporter"]
+        super().__init__(config, site="qianbi")

novel_downloader/core/exporters/qidian.py CHANGED Viewed

@@ -8,12 +8,25 @@ of novels sourced from Qidian (起点中文网). It implements the platform-spec
 logic required to structure and export novel content into desired formats.
 """
+__all__ = ["QidianExporter"]
+from novel_downloader.core.exporters.registry import register_exporter
 from novel_downloader.models import ExporterConfig
 from .common import CommonExporter
+@register_exporter(site_keys=["qidian", "qd"])
 class QidianExporter(CommonExporter):
+    """ """
+    DEFAULT_SOURCE_ID = 0
+    ENCRYPTED_SOURCE_ID = 1
+    PRIORITIES_MAP = {
+        DEFAULT_SOURCE_ID: 0,
+        ENCRYPTED_SOURCE_ID: 1,
+    }
     def __init__(
         self,
         config: ExporterConfig,
@@ -21,8 +34,5 @@ class QidianExporter(CommonExporter):
         super().__init__(
             config,
             site="qidian",
-            chap_folders=["chapters", "encrypted_chapters"],
+            priorities=self.PRIORITIES_MAP,
         )
-__all__ = ["QidianExporter"]

novel_downloader/core/exporters/registry.py ADDED Viewed

@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.exporters.registry
+----------------------------------------
+"""
+__all__ = ["register_exporter", "get_exporter"]
+from collections.abc import Callable, Sequence
+from typing import TypeVar
+from novel_downloader.core.interfaces import ExporterProtocol
+from novel_downloader.models import ExporterConfig
+ExporterBuilder = Callable[[ExporterConfig], ExporterProtocol]
+E = TypeVar("E", bound=ExporterProtocol)
+_EXPORTER_MAP: dict[str, ExporterBuilder] = {}
+def register_exporter(
+    site_keys: Sequence[str],
+) -> Callable[[type[E]], type[E]]:
+    """
+    Decorator to register a exporter class under given keys.
+    :param site_keys: Sequence of site identifiers
+    :return: A class decorator that populates _EXPORTER_MAP.
+    """
+    def decorator(cls: type[E]) -> type[E]:
+        for key in site_keys:
+            _EXPORTER_MAP[key.lower()] = cls
+        return cls
+    return decorator
+def get_exporter(site: str, config: ExporterConfig) -> ExporterProtocol:
+    """
+    Returns a site-specific exporter instance.
+    :param site: Site name (e.g., 'qidian')
+    :param config: Configuration for the exporter
+    :return: An instance of a exporter class
+    """
+    site_key = site.lower()
+    try:
+        exporter_cls = _EXPORTER_MAP[site_key]
+    except KeyError as err:
+        raise ValueError(f"Unsupported site: {site}") from err
+    return exporter_cls(config)

novel_downloader/core/exporters/sfacg.py CHANGED Viewed

@@ -5,21 +5,18 @@ novel_downloader.core.exporters.sfacg
 """
+__all__ = ["SfacgExporter"]
+from novel_downloader.core.exporters.registry import register_exporter
 from novel_downloader.models import ExporterConfig
 from .common import CommonExporter
+@register_exporter(site_keys=["sfacg"])
 class SfacgExporter(CommonExporter):
     def __init__(
         self,
         config: ExporterConfig,
     ):
-        super().__init__(
-            config,
-            site="sfacg",
-            chap_folders=["chapters"],
-        )
-__all__ = ["SfacgExporter"]
+        super().__init__(config, site="sfacg")

novel_downloader/core/exporters/txt_util.py ADDED Viewed

@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.exporters.txt_util
+----------------------------------------
+Utilities for generating plain-text exports of novel content.
+"""
+__all__ = [
+    "build_txt_header",
+    "build_txt_chapter",
+]
+import re
+_IMG_TAG_RE = re.compile(r"<img[^>]*>")
+def build_txt_header(fields: list[tuple[str, str]]) -> str:
+    """
+    Build a simple text header from label-value pairs, followed by a dashed separator.
+    :param fields: List of (label, value) pairs.
+    :return: A single string containing the formatted header.
+    """
+    header_lines = [f"{label}: {value}" for label, value in fields if value]
+    header_lines += ["", "-" * 10, ""]
+    return "\n".join(header_lines)
+def build_txt_chapter(
+    title: str,
+    paragraphs: str,
+    extras: dict[str, str] | None = None,
+) -> str:
+    """
+    Build a formatted chapter text block including title, body paragraphs,
+    and optional extra sections.
+    - Strips any `<img...>` tags from paragraphs.
+    - Title appears first (stripped of surrounding whitespace).
+    - Each non-blank line in `paragraphs` becomes its own paragraph.
+    :param title:      Chapter title.
+    :param paragraphs: Raw multi-line string. Blank lines are ignored.
+    :param extras:     Optional dict mapping section titles to multi-line strings.
+    :return:           A string where title, paragraphs, and extras are joined by lines.
+    """
+    parts: list[str] = [title.strip()]
+    # add each nonempty paragraph line
+    paragraphs = _IMG_TAG_RE.sub("", paragraphs)
+    for ln in paragraphs.splitlines():
+        line = ln.strip()
+        if line:
+            parts.append(line)
+    if extras:
+        for title, text in extras.items():
+            lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
+            if not lines:
+                continue
+            parts.append("---")
+            parts.append(title.strip())
+            parts.extend(lines)
+    return "\n\n".join(parts)

novel_downloader/core/exporters/yamibo.py CHANGED Viewed

@@ -5,21 +5,18 @@ novel_downloader.core.exporters.yamibo
 """
+__all__ = ["YamiboExporter"]
+from novel_downloader.core.exporters.registry import register_exporter
 from novel_downloader.models import ExporterConfig
 from .common import CommonExporter
+@register_exporter(site_keys=["yamibo"])
 class YamiboExporter(CommonExporter):
     def __init__(
         self,
         config: ExporterConfig,
     ):
-        super().__init__(
-            config,
-            site="yamibo",
-            chap_folders=["chapters"],
-        )
-__all__ = ["YamiboExporter"]
+        super().__init__(config, site="yamibo")

novel_downloader/core/fetchers/__init__.py CHANGED Viewed

@@ -16,17 +16,30 @@ Subpackages:
 - qidian (起点中文网)
 - sfacg (SF轻小说)
 - yamibo (百合会)
-- common (通用架构)
 """
+__all__ = [
+    "get_fetcher",
+    "BiqugeBrowser",
+    "BiqugeSession",
+    "EsjzoneBrowser",
+    "EsjzoneSession",
+    "LinovelibBrowser",
+    "LinovelibSession",
+    "QianbiBrowser",
+    "QianbiSession",
+    "QidianBrowser",
+    "QidianSession",
+    "SfacgBrowser",
+    "SfacgSession",
+    "YamiboBrowser",
+    "YamiboSession",
+]
 from .biquge import (
     BiqugeBrowser,
     BiqugeSession,
 )
-from .common import (
-    CommonBrowser,
-    CommonSession,
-)
 from .esjzone import (
     EsjzoneBrowser,
     EsjzoneSession,
@@ -43,6 +56,7 @@ from .qidian import (
     QidianBrowser,
     QidianSession,
 )
+from .registry import get_fetcher
 from .sfacg import (
     SfacgBrowser,
     SfacgSession,
@@ -51,22 +65,3 @@ from .yamibo import (
     YamiboBrowser,
     YamiboSession,
 )
-__all__ = [
-    "BiqugeBrowser",
-    "BiqugeSession",
-    "CommonBrowser",
-    "CommonSession",
-    "EsjzoneBrowser",
-    "EsjzoneSession",
-    "LinovelibBrowser",
-    "LinovelibSession",
-    "QianbiBrowser",
-    "QianbiSession",
-    "QidianBrowser",
-    "QidianSession",
-    "SfacgBrowser",
-    "SfacgSession",
-    "YamiboBrowser",
-    "YamiboSession",
-]

novel_downloader/core/fetchers/base/__init__.py CHANGED Viewed

@@ -5,10 +5,10 @@ novel_downloader.core.fetchers.base
 """
-from .browser import BaseBrowser
-from .session import BaseSession
 __all__ = [
     "BaseBrowser",
     "BaseSession",
 ]
+from .browser import BaseBrowser
+from .session import BaseSession

novel_downloader/core/fetchers/base/browser.py CHANGED Viewed

@@ -6,9 +6,11 @@ novel_downloader.core.fetchers.base.browser
 """
 import abc
+import asyncio
 import logging
 import types
-from typing import Any, Literal, Self
+from pathlib import Path
+from typing import Any, Literal, Self, TypedDict
 from playwright.async_api import (
     Browser,
@@ -21,7 +23,7 @@ from playwright.async_api import (
 )
 from novel_downloader.core.interfaces import FetcherProtocol
-from novel_downloader.models import FetcherConfig, LoginField, NewContextOptions
+from novel_downloader.models import FetcherConfig, LoginField
 from novel_downloader.utils.constants import (
     DATA_DIR,
     DEFAULT_USER_AGENT,
@@ -37,6 +39,16 @@ window.chrome = { runtime: {} };
 """.strip()
+class NewContextOptions(TypedDict, total=False):
+    user_agent: str
+    locale: str
+    storage_state: Path
+    viewport: ViewportSize
+    java_script_enabled: bool
+    ignore_https_errors: bool
+    extra_http_headers: dict[str, str]
 class BaseBrowser(FetcherProtocol, abc.ABC):
     """
     BaseBrowser wraps basic browser operations using playwright
@@ -198,12 +210,15 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
         wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
         | None = "load",
         referer: str | None = None,
+        delay: float = 0.0,
         **kwargs: Any,
     ) -> str:
         if self._reuse_page:
-            return await self._fetch_with_reuse(url, wait_until, referer, **kwargs)
+            return await self._fetch_with_reuse(
+                url, wait_until, referer, delay, **kwargs
+            )
         else:
-            return await self._fetch_with_new(url, wait_until, referer, **kwargs)
+            return await self._fetch_with_new(url, wait_until, referer, delay, **kwargs)
     async def load_state(self) -> bool:
         """ """
@@ -282,11 +297,13 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
         wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
         | None = "load",
         referer: str | None = None,
+        delay: float = 0.0,
         **kwargs: Any,
     ) -> str:
         page = await self.context.new_page()
         try:
             await page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
+            await asyncio.sleep(delay)
             html: str = await page.content()
             return html
         finally:
@@ -298,11 +315,13 @@ class BaseBrowser(FetcherProtocol, abc.ABC):
         wait_until: Literal["commit", "domcontentloaded", "load", "networkidle"]
         | None = "load",
         referer: str | None = None,
+        delay: float = 0.0,
         **kwargs: Any,
     ) -> str:
         if not self._page:
             self._page = await self.context.new_page()
         await self._page.goto(url, wait_until=wait_until, referer=referer, **kwargs)
+        await asyncio.sleep(delay)
         html: str = await self._page.content()
         return html

novel_downloader/core/fetchers/base/session.py CHANGED Viewed

@@ -21,12 +21,14 @@ from aiohttp import ClientResponse, ClientSession, ClientTimeout, TCPConnector
 from novel_downloader.core.interfaces import FetcherProtocol
 from novel_downloader.models import FetcherConfig, LoginField
+from novel_downloader.utils import (
+    async_sleep_with_random_delay,
+    parse_cookie_expires,
+)
 from novel_downloader.utils.constants import (
     DATA_DIR,
     DEFAULT_USER_HEADERS,
 )
-from novel_downloader.utils.cookies import parse_cookie_expires
-from novel_downloader.utils.time_utils import async_sleep_with_random_delay
 from .rate_limiter import TokenBucketRateLimiter
@@ -156,7 +158,12 @@ class BaseSession(FetcherProtocol, abc.ABC):
             await self._session.close()
         self._session = None
-    async def fetch(self, url: str, **kwargs: Any) -> str:
+    async def fetch(
+        self,
+        url: str,
+        encoding: str | None = None,
+        **kwargs: Any,
+    ) -> str:
         """
         Fetch the content from the given URL asynchronously, with retry support.
@@ -172,8 +179,7 @@ class BaseSession(FetcherProtocol, abc.ABC):
             try:
                 async with self.session.get(url, **kwargs) as resp:
                     resp.raise_for_status()
-                    text: str = await resp.text()
-                    return text
+                    return await self._response_to_str(resp, encoding)
             except aiohttp.ClientError:
                 if attempt < self.retry_times:
                     await async_sleep_with_random_delay(
@@ -405,6 +411,25 @@ class BaseSession(FetcherProtocol, abc.ABC):
             return dict(self._session.headers)
         return self._headers.copy()
+    @staticmethod
+    async def _response_to_str(
+        resp: ClientResponse,
+        encoding: str | None = None,
+    ) -> str:
+        """
+        Read the full body of resp as text. First try the declared charset,
+        then on UnicodeDecodeError fall back to a lenient utf-8 decode.
+        """
+        data: bytes = await resp.read()
+        encodings = [encoding, resp.charset, "utf-8", "gb18030", "gbk"]
+        encodings_list: list[str] = [e for e in encodings if e]
+        for enc in encodings_list:
+            try:
+                return data.decode(enc)
+            except UnicodeDecodeError:
+                continue
+        return data.decode("utf-8", errors="ignore")
     async def __aenter__(self) -> Self:
         if self._session is None or self._session.closed:
             await self.init()

novel_downloader/core/fetchers/biquge/__init__.py CHANGED Viewed

@@ -5,10 +5,10 @@ novel_downloader.core.fetchers.biquge
 """
-from .browser import BiqugeBrowser
-from .session import BiqugeSession
 __all__ = [
     "BiqugeBrowser",
     "BiqugeSession",
 ]
+from .browser import BiqugeBrowser
+from .session import BiqugeSession

novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

novel-downloader 1.4.5py3-none-any.whl → 1.5.0py3-none-any.whl