PyPI - novel-downloader - Versions diffs - 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

novel-downloader 1.4.5py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +2 -2
novel_downloader/cli/config.py +1 -83
novel_downloader/cli/download.py +4 -5
novel_downloader/cli/export.py +4 -1
novel_downloader/cli/main.py +2 -0
novel_downloader/cli/search.py +123 -0
novel_downloader/config/__init__.py +3 -10
novel_downloader/config/adapter.py +190 -54
novel_downloader/config/loader.py +2 -3
novel_downloader/core/__init__.py +13 -13
novel_downloader/core/downloaders/__init__.py +10 -11
novel_downloader/core/downloaders/base.py +152 -26
novel_downloader/core/downloaders/biquge.py +5 -1
novel_downloader/core/downloaders/common.py +157 -378
novel_downloader/core/downloaders/esjzone.py +5 -1
novel_downloader/core/downloaders/linovelib.py +5 -1
novel_downloader/core/downloaders/qianbi.py +291 -4
novel_downloader/core/downloaders/qidian.py +199 -285
novel_downloader/core/downloaders/registry.py +67 -0
novel_downloader/core/downloaders/sfacg.py +5 -1
novel_downloader/core/downloaders/yamibo.py +5 -1
novel_downloader/core/exporters/__init__.py +10 -11
novel_downloader/core/exporters/base.py +87 -7
novel_downloader/core/exporters/biquge.py +5 -8
novel_downloader/core/exporters/common/__init__.py +2 -2
novel_downloader/core/exporters/common/epub.py +82 -166
novel_downloader/core/exporters/common/main_exporter.py +0 -60
novel_downloader/core/exporters/common/txt.py +82 -83
novel_downloader/core/exporters/epub_util.py +157 -1330
novel_downloader/core/exporters/esjzone.py +5 -8
novel_downloader/core/exporters/linovelib/__init__.py +2 -2
novel_downloader/core/exporters/linovelib/epub.py +157 -212
novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
novel_downloader/core/exporters/linovelib/txt.py +67 -63
novel_downloader/core/exporters/qianbi.py +5 -8
novel_downloader/core/exporters/qidian.py +14 -4
novel_downloader/core/exporters/registry.py +53 -0
novel_downloader/core/exporters/sfacg.py +5 -8
novel_downloader/core/exporters/txt_util.py +67 -0
novel_downloader/core/exporters/yamibo.py +5 -8
novel_downloader/core/fetchers/__init__.py +19 -24
novel_downloader/core/fetchers/base/__init__.py +3 -3
novel_downloader/core/fetchers/base/browser.py +23 -4
novel_downloader/core/fetchers/base/session.py +30 -5
novel_downloader/core/fetchers/biquge/__init__.py +3 -3
novel_downloader/core/fetchers/biquge/browser.py +5 -0
novel_downloader/core/fetchers/biquge/session.py +6 -1
novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
novel_downloader/core/fetchers/esjzone/browser.py +5 -0
novel_downloader/core/fetchers/esjzone/session.py +6 -1
novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
novel_downloader/core/fetchers/linovelib/browser.py +6 -1
novel_downloader/core/fetchers/linovelib/session.py +6 -1
novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
novel_downloader/core/fetchers/qianbi/browser.py +5 -0
novel_downloader/core/fetchers/qianbi/session.py +5 -0
novel_downloader/core/fetchers/qidian/__init__.py +3 -3
novel_downloader/core/fetchers/qidian/browser.py +12 -4
novel_downloader/core/fetchers/qidian/session.py +11 -3
novel_downloader/core/fetchers/registry.py +71 -0
novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
novel_downloader/core/fetchers/sfacg/browser.py +5 -0
novel_downloader/core/fetchers/sfacg/session.py +5 -0
novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
novel_downloader/core/fetchers/yamibo/browser.py +5 -0
novel_downloader/core/fetchers/yamibo/session.py +6 -1
novel_downloader/core/interfaces/__init__.py +7 -5
novel_downloader/core/interfaces/searcher.py +18 -0
novel_downloader/core/parsers/__init__.py +10 -11
novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
novel_downloader/core/parsers/qidian/__init__.py +2 -2
novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
novel_downloader/core/parsers/qidian/main_parser.py +10 -21
novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
novel_downloader/core/parsers/registry.py +68 -0
novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
novel_downloader/core/searchers/__init__.py +20 -0
novel_downloader/core/searchers/base.py +92 -0
novel_downloader/core/searchers/biquge.py +83 -0
novel_downloader/core/searchers/esjzone.py +84 -0
novel_downloader/core/searchers/qianbi.py +131 -0
novel_downloader/core/searchers/qidian.py +87 -0
novel_downloader/core/searchers/registry.py +63 -0
novel_downloader/locales/en.json +12 -4
novel_downloader/locales/zh.json +12 -4
novel_downloader/models/__init__.py +4 -30
novel_downloader/models/config.py +12 -6
novel_downloader/models/search.py +16 -0
novel_downloader/models/types.py +0 -2
novel_downloader/resources/config/settings.toml +31 -4
novel_downloader/resources/css_styles/intro.css +83 -0
novel_downloader/resources/css_styles/main.css +30 -89
novel_downloader/utils/__init__.py +52 -0
novel_downloader/utils/chapter_storage.py +244 -224
novel_downloader/utils/constants.py +1 -21
novel_downloader/utils/epub/__init__.py +34 -0
novel_downloader/utils/epub/builder.py +377 -0
novel_downloader/utils/epub/constants.py +77 -0
novel_downloader/utils/epub/documents.py +403 -0
novel_downloader/utils/epub/models.py +134 -0
novel_downloader/utils/epub/utils.py +212 -0
novel_downloader/utils/file_utils/__init__.py +10 -14
novel_downloader/utils/file_utils/io.py +20 -51
novel_downloader/utils/file_utils/normalize.py +2 -2
novel_downloader/utils/file_utils/sanitize.py +2 -3
novel_downloader/utils/fontocr/__init__.py +5 -5
novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
novel_downloader/utils/fontocr/ocr_v1.py +13 -1
novel_downloader/utils/fontocr/ocr_v2.py +13 -1
novel_downloader/utils/fontocr/ocr_v3.py +744 -0
novel_downloader/utils/i18n.py +2 -0
novel_downloader/utils/logger.py +2 -0
novel_downloader/utils/network.py +110 -251
novel_downloader/utils/state.py +1 -0
novel_downloader/utils/text_utils/__init__.py +18 -17
novel_downloader/utils/text_utils/diff_display.py +4 -5
novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
novel_downloader/utils/text_utils/text_cleaner.py +179 -0
novel_downloader/utils/text_utils/truncate_utils.py +62 -0
novel_downloader/utils/time_utils/__init__.py +3 -3
novel_downloader/utils/time_utils/datetime_utils.py +4 -5
novel_downloader/utils/time_utils/sleep_utils.py +2 -3
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
novel_downloader-1.5.0.dist-info/RECORD +164 -0
novel_downloader/config/site_rules.py +0 -94
novel_downloader/core/factory/__init__.py +0 -20
novel_downloader/core/factory/downloader.py +0 -73
novel_downloader/core/factory/exporter.py +0 -58
novel_downloader/core/factory/fetcher.py +0 -96
novel_downloader/core/factory/parser.py +0 -86
novel_downloader/core/fetchers/common/__init__.py +0 -14
novel_downloader/core/fetchers/common/browser.py +0 -79
novel_downloader/core/fetchers/common/session.py +0 -79
novel_downloader/core/parsers/biquge/__init__.py +0 -10
novel_downloader/core/parsers/common/__init__.py +0 -13
novel_downloader/core/parsers/common/helper.py +0 -323
novel_downloader/core/parsers/common/main_parser.py +0 -106
novel_downloader/core/parsers/esjzone/__init__.py +0 -10
novel_downloader/core/parsers/linovelib/__init__.py +0 -10
novel_downloader/core/parsers/qianbi/__init__.py +0 -10
novel_downloader/core/parsers/sfacg/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/__init__.py +0 -10
novel_downloader/models/browser.py +0 -21
novel_downloader/models/site_rules.py +0 -99
novel_downloader/models/tasks.py +0 -33
novel_downloader/resources/css_styles/volume-intro.css +0 -56
novel_downloader/resources/json/replace_word_map.json +0 -4
novel_downloader/resources/text/blacklist.txt +0 -22
novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
novel_downloader/utils/text_utils/font_mapping.py +0 -28
novel_downloader/utils/text_utils/text_cleaning.py +0 -107
novel_downloader-1.4.5.dist-info/RECORD +0 -165
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0

novel_downloader/config/adapter.py CHANGED Viewed

@@ -7,67 +7,53 @@ Defines ConfigAdapter, which maps a raw configuration dictionary and
 site name into structured dataclass-based config models.
 """
-from typing import Any
+import json
+from typing import Any, cast
 from novel_downloader.models import (
     BookConfig,
     DownloaderConfig,
     ExporterConfig,
     FetcherConfig,
+    LogLevel,
     ParserConfig,
+    TextCleanerConfig,
 )
-from novel_downloader.utils.constants import SUPPORTED_SITES
-from .site_rules import load_site_rules
 class ConfigAdapter:
     """
-    Adapter to map a raw config dict + site name into structured dataclass configs.
+    Adapter to map a raw configuration dictionary and site name
+    into structured dataclass configuration models.
     """
+    _ALLOWED_LOG_LEVELS: tuple[LogLevel, ...] = (
+        "DEBUG",
+        "INFO",
+        "WARNING",
+        "ERROR",
+    )
     def __init__(self, config: dict[str, Any], site: str):
         """
-        :param config: 完整加载的配置 dict
-        :param site:   当前站点名称 (e.g. "qidian")
+        Initialize the adapter.
+        :param config: The fully loaded configuration dictionary.
+        :param site:   The current site name (e.g. "qidian").
         """
         self._config = config
         self._site = site
-        site_rules = load_site_rules()  # -> Dict[str, SiteRules]
-        self._supported_sites = set(site_rules.keys()) | SUPPORTED_SITES
-    @property
-    def site(self) -> str:
-        return self._site
-    @site.setter
-    def site(self, value: str) -> None:
-        self._site = value
-    def _get_site_cfg(self, site: str | None = None) -> dict[str, Any]:
-        """
-        获取指定站点的配置 (默认为当前适配站点)
-        1. 如果有 site-specific 配置, 优先返回它
-        2. 否则, 如果该站点在支持站点中, 尝试返回 'common' 配置
-        3. 否则返回空 dict
+    def get_fetcher_config(self) -> FetcherConfig:
         """
-        site = site or self._site
-        sites_cfg = self._config.get("sites", {}) or {}
-        if site in sites_cfg:
-            return sites_cfg[site] or {}
-        if site in self._supported_sites:
-            return sites_cfg.get("common", {}) or {}
+        Build a FetcherConfig from the raw configuration.
-        return {}
+        Reads from:
+          - config["general"] for global defaults (e.g. request_interval)
+          - config["requests"] for HTTP-specific settings (timeouts, retries, etc.)
+          - site-specific overrides under config["sites"][site]
-    def get_fetcher_config(self) -> FetcherConfig:
-        """
-        从 config["requests"] 中读取通用请求配置
-        返回 FetcherConfig 实例
+        :return: A FetcherConfig instance with all fields populated.
         """
         gen = self._config.get("general", {})
         req = self._config.get("requests", {})
@@ -91,8 +77,15 @@ class ConfigAdapter:
     def get_downloader_config(self) -> DownloaderConfig:
         """
-        从 config["general"] 和 config["sites"][site] 中读取下载器相关配置,
-        返回 DownloaderConfig 实例
+        Build a DownloaderConfig using both general and site-specific settings.
+        Reads from:
+          - config["general"] for download directories, worker counts, etc.
+          - config["requests"] for retry and backoff settings
+          - config["general"]["debug"] for debug toggles (e.g. save_html)
+          - config["sites"][site] for login credentials and mode
+        :return: A DownloaderConfig instance with all fields populated.
         """
         gen = self._config.get("general", {})
         req = self._config.get("requests", {})
@@ -104,13 +97,11 @@ class ConfigAdapter:
             backoff_factor=req.get("backoff_factor", 2.0),
             raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
             cache_dir=gen.get("cache_dir", "./novel_cache"),
-            download_workers=gen.get("download_workers", 2),
-            parser_workers=gen.get("parser_workers", 2),
+            workers=gen.get("workers", 2),
             skip_existing=gen.get("skip_existing", True),
             login_required=site_cfg.get("login_required", False),
             save_html=debug.get("save_html", False),
             mode=site_cfg.get("mode", "session"),
-            storage_backend=gen.get("storage_backend", "json"),
             storage_batch_size=gen.get("storage_batch_size", 1),
             username=site_cfg.get("username", ""),
             password=site_cfg.get("password", ""),
@@ -119,8 +110,14 @@ class ConfigAdapter:
     def get_parser_config(self) -> ParserConfig:
         """
-        从 config["general"]["cache_dir"]、config["general"]["debug"] 与
-        config["sites"][site] 中读取解析器相关配置, 返回 ParserConfig 实例
+        Build a ParserConfig from general, OCR, and site-specific settings.
+        Reads from:
+          - config["general"]["cache_dir"] for where to cache intermediate parses
+          - config["general"]["font_ocr"] for font-decoding and OCR options
+          - config["sites"][site] for parsing mode and truncation behavior
+        :return: A ParserConfig instance with all fields populated.
         """
         gen = self._config.get("general", {})
         font_ocr = gen.get("font_ocr", {})
@@ -144,20 +141,29 @@ class ConfigAdapter:
     def get_exporter_config(self) -> ExporterConfig:
         """
-        从 config["general"] 与 config["output"] 中读取存储器相关配置,
-        返回 ExporterConfig 实例
+        Build an ExporterConfig from output and general settings.
+        Reads from:
+          - config["general"] for cache and raw data directories
+          - config["output"]["formats"] for which formats to generate
+          - config["output"]["naming"] for filename templates
+          - config["output"]["epub"] for EPUB-specific options
+          - config["sites"][site] for export split mode
+        :return: An ExporterConfig instance with all fields populated.
         """
         gen = self._config.get("general", {})
         out = self._config.get("output", {})
+        cln = self._config.get("cleaner", {})
         fmt = out.get("formats", {})
         naming = out.get("naming", {})
         epub_opts = out.get("epub", {})
         site_cfg = self._get_site_cfg()
+        cleaner_cfg = self._dict_to_cleaner_cfg(cln)
         return ExporterConfig(
             cache_dir=gen.get("cache_dir", "./novel_cache"),
             raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
             output_dir=gen.get("output_dir", "./downloads"),
-            storage_backend=gen.get("storage_backend", "json"),
             clean_text=out.get("clean_text", True),
             make_txt=fmt.get("make_txt", True),
             make_epub=fmt.get("make_epub", False),
@@ -169,11 +175,20 @@ class ConfigAdapter:
             include_toc=epub_opts.get("include_toc", False),
             include_picture=epub_opts.get("include_picture", False),
             split_mode=site_cfg.get("split_mode", "book"),
+            cleaner_cfg=cleaner_cfg,
         )
     def get_book_ids(self) -> list[BookConfig]:
         """
-        从 config["sites"][site]["book_ids"] 中提取目标书籍列表
+        Extract the list of target books from the site configuration.
+        The site config may specify book_ids as:
+          - a single string or integer
+          - a dict with book_id and optional start_id, end_id, ignore_ids
+          - a list of the above types
+        :return: A list of BookConfig dicts.
+        :raises ValueError: if the raw book_ids is neither a str/int, dict, nor list.
         """
         site_cfg = self._get_site_cfg()
         raw = site_cfg.get("book_ids", [])
@@ -182,7 +197,7 @@ class ConfigAdapter:
             return [{"book_id": str(raw)}]
         if isinstance(raw, dict):
-            return [self._dict_to_book_config(raw)]
+            return [self._dict_to_book_cfg(raw)]
         if not isinstance(raw, list):
             raise ValueError(
@@ -195,17 +210,71 @@ class ConfigAdapter:
                 if isinstance(item, str | int):
                     result.append({"book_id": str(item)})
                 elif isinstance(item, dict):
-                    result.append(self._dict_to_book_config(item))
+                    result.append(self._dict_to_book_cfg(item))
             except ValueError:
                 continue
         return result
+    def get_log_level(self) -> LogLevel:
+        """
+        Retrieve the logging level from [general.debug].
+        Reads from config["general"]["debug"]["log_level"], defaulting to "INFO"
+        if not set or invalid.
+        :return: The configured LogLevel literal ("DEBUG", "INFO", "WARNING", "ERROR").
+        """
+        debug_cfg = self._config.get("general", {}).get("debug", {})
+        raw = debug_cfg.get("log_level") or "INFO"
+        if raw in self._ALLOWED_LOG_LEVELS:
+            return cast(LogLevel, raw)
+        return "INFO"
+    @property
+    def site(self) -> str:
+        """
+        Get the current site name.
+        """
+        return self._site
+    @site.setter
+    def site(self, value: str) -> None:
+        """
+        Set a new site name for configuration lookups.
+        :param value: The new site key in config["sites"] to use.
+        """
+        self._site = value
+    def _get_site_cfg(self, site: str | None = None) -> dict[str, Any]:
+        """
+        Retrieve the configuration for a specific site.
+        Lookup order:
+          1. If there is a site-specific entry under config["sites"], return that.
+          2. Otherwise, if a "common" entry exists under config["sites"], return that.
+          3. If neither is present, return an empty dict.
+        :param site: Optional override of the site name; defaults to self._site.
+        :return: The site-specific or common configuration dict.
+        """
+        site = site or self._site
+        sites_cfg = self._config.get("sites", {}) or {}
+        if site in sites_cfg:
+            return sites_cfg[site] or {}
+        return sites_cfg.get("common", {}) or {}
     @staticmethod
-    def _dict_to_book_config(data: dict[str, Any]) -> BookConfig:
+    def _dict_to_book_cfg(data: dict[str, Any]) -> BookConfig:
         """
-        Converts a dict to BookConfig with type normalization.
-        Raises ValueError if 'book_id' is missing.
+        Convert a dictionary to a BookConfig with normalized types.
+        :param data: A dict that must contain at least "book_id".
+        :return: A BookConfig dict with all values cast to strings or lists of strings.
+        :raises ValueError: if the "book_id" field is missing.
         """
         if "book_id" not in data:
             raise ValueError("Missing required field 'book_id'")
@@ -222,3 +291,70 @@ class ConfigAdapter:
             result["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
         return result
+    @classmethod
+    def _dict_to_cleaner_cfg(cls, cfg: dict[str, Any]) -> TextCleanerConfig:
+        """
+        Convert a nested dict of title/content rules into a TextCleanerConfig.
+        :param cfg: configuration dictionary
+        :return: fully constructed TextCleanerConfig
+        """
+        # Title rules
+        title_section = cfg.get("title", {})
+        title_remove = title_section.get("remove_patterns", [])
+        title_repl = title_section.get("replace", {})
+        title_ext = title_section.get("external", {})
+        title_ext_en = title_ext.get("enabled", False)
+        title_ext_rm_p = title_ext.get("remove_patterns", "")
+        title_ext_rp_p = title_ext.get("replace", "")
+        if title_ext_en:
+            title_remove_ext = cls._load_str_list(title_ext_rm_p)
+            title_remove += title_remove_ext
+            title_repl_ext = cls._load_str_dict(title_ext_rp_p)
+            title_repl = {**title_repl, **title_repl_ext}
+        # Content rules
+        content_section = cfg.get("content", {})
+        content_remove = content_section.get("remove_patterns", [])
+        content_repl = content_section.get("replace", {})
+        content_ext = content_section.get("external", {})
+        content_ext_en = content_ext.get("enabled", False)
+        content_ext_rm_p = content_ext.get("remove_patterns", "")
+        content_ext_rp_p = content_ext.get("replace", "")
+        if content_ext_en:
+            content_remove_ext = cls._load_str_list(content_ext_rm_p)
+            content_remove += content_remove_ext
+            content_repl_ext = cls._load_str_dict(content_ext_rp_p)
+            content_repl = {**content_repl, **content_repl_ext}
+        return TextCleanerConfig(
+            remove_invisible=cfg.get("remove_invisible", True),
+            title_remove_patterns=title_remove,
+            title_replacements=title_repl,
+            content_remove_patterns=content_remove,
+            content_replacements=content_repl,
+        )
+    @staticmethod
+    def _load_str_list(path: str) -> list[str]:
+        try:
+            with open(path, encoding="utf-8") as f:
+                parsed = json.load(f)
+            return cast(list[str], parsed)
+        except Exception:
+            return []
+    @staticmethod
+    def _load_str_dict(path: str) -> dict[str, str]:
+        try:
+            with open(path, encoding="utf-8") as f:
+                parsed = json.load(f)
+            return cast(dict[str, str], parsed)
+        except Exception:
+            return {}

novel_downloader/config/loader.py CHANGED Viewed

@@ -7,6 +7,8 @@ Provides functionality to load Toml configuration files into Python
 dictionaries, with robust error handling and fallback support.
 """
+__all__ = ["load_config"]
 import json
 import logging
 from pathlib import Path
@@ -180,6 +182,3 @@ def save_config_file(
     logger.info("[config] Configuration successfully saved to JSON: %s", output)
     return
-__all__ = ["load_config"]

novel_downloader/core/__init__.py CHANGED Viewed

@@ -14,26 +14,26 @@ downloading and processing online novel content, including:
 - Exporter: Responsible for exporting downloaded data into various output formats.
 """
-from .factory import (
-    get_downloader,
-    get_exporter,
-    get_fetcher,
-    get_parser,
-)
-from .interfaces import (
-    DownloaderProtocol,
-    ExporterProtocol,
-    FetcherProtocol,
-    ParserProtocol,
-)
 __all__ = [
     "get_downloader",
     "get_exporter",
     "get_fetcher",
     "get_parser",
+    "search",
     "DownloaderProtocol",
     "ExporterProtocol",
     "FetcherProtocol",
     "ParserProtocol",
 ]
+from .downloaders import get_downloader
+from .exporters import get_exporter
+from .fetchers import get_fetcher
+from .interfaces import (
+    DownloaderProtocol,
+    ExporterProtocol,
+    FetcherProtocol,
+    ParserProtocol,
+)
+from .parsers import get_parser
+from .searchers import search

novel_downloader/core/downloaders/__init__.py CHANGED Viewed

@@ -17,19 +17,10 @@ Currently supported platforms:
 - qidian (起点中文网)
 - sfacg (SF轻小说)
 - yamibo (百合会)
-- common (通用架构)
 """
-from .biquge import BiqugeDownloader
-from .common import CommonDownloader
-from .esjzone import EsjzoneDownloader
-from .linovelib import LinovelibDownloader
-from .qianbi import QianbiDownloader
-from .qidian import QidianDownloader
-from .sfacg import SfacgDownloader
-from .yamibo import YamiboDownloader
 __all__ = [
+    "get_downloader",
     "BiqugeDownloader",
     "EsjzoneDownloader",
     "LinovelibDownloader",
@@ -37,5 +28,13 @@ __all__ = [
     "QidianDownloader",
     "SfacgDownloader",
     "YamiboDownloader",
-    "CommonDownloader",
 ]
+from .biquge import BiqugeDownloader
+from .esjzone import EsjzoneDownloader
+from .linovelib import LinovelibDownloader
+from .qianbi import QianbiDownloader
+from .qidian import QidianDownloader
+from .registry import get_downloader
+from .sfacg import SfacgDownloader
+from .yamibo import YamiboDownloader

novel_downloader/core/downloaders/base.py CHANGED Viewed

@@ -8,8 +8,9 @@ common interface and reusable logic for all downloader implementations.
 """
 import abc
+import json
 import logging
-from collections.abc import Awaitable, Callable
+from collections.abc import AsyncIterator, Awaitable, Callable, Sequence
 from pathlib import Path
 from typing import Any
@@ -19,32 +20,54 @@ from novel_downloader.core.interfaces import (
     ParserProtocol,
 )
 from novel_downloader.models import BookConfig, DownloaderConfig
+from novel_downloader.utils import calculate_time_difference
 class BaseDownloader(DownloaderProtocol, abc.ABC):
     """
-    Abstract downloader that defines the initialization interface
-    and the general batch download flow.
+    Abstract base class for novel downloaders.
-    Subclasses must implement the logic for downloading a single book.
+    Defines the general interface and batch download workflow,
+    while delegating book-specific downloading logic to subclasses.
+    Subclasses are required to implement methods for downloading
+    a single book, using the provided fetcher and parser components.
     """
+    DEFAULT_SOURCE_ID = 0
+    DEFAULT_PRIORITIES_MAP = {
+        DEFAULT_SOURCE_ID: 0,
+    }
     def __init__(
         self,
         fetcher: FetcherProtocol,
         parser: ParserProtocol,
         config: DownloaderConfig,
         site: str,
+        priorities: dict[int, int] | None = None,
     ):
+        """
+        Initialize the downloader for a specific site.
+        :param fetcher: Fetcher component for retrieving raw chapter data.
+        :param parser: Parser component for extracting chapter content.
+        :param config: Downloader configuration settings.
+        :param site: Identifier for the target website or source.
+        :param priorities: Mapping of source_id to priority value.
+                           Lower numbers indicate higher priority.
+                           E.X. {0: 10, 1: 100} means source 0 is preferred.
+        """
         self._fetcher = fetcher
         self._parser = parser
         self._config = config
         self._site = site
+        self._priorities = priorities or self.DEFAULT_PRIORITIES_MAP
         self._raw_data_dir = Path(config.raw_data_dir) / site
-        self._cache_dir = Path(config.cache_dir) / site
         self._raw_data_dir.mkdir(parents=True, exist_ok=True)
-        self._cache_dir.mkdir(parents=True, exist_ok=True)
+        self._debug_dir = Path.cwd() / "debug" / site
+        self._debug_dir.mkdir(parents=True, exist_ok=True)
         self.logger = logging.getLogger(f"{self.__class__.__name__}")
@@ -117,6 +140,28 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
         await self._finalize()
+    async def load_book_info(
+        self,
+        book_id: str,
+        html_dir: Path,
+    ) -> dict[str, Any]:
+        book_info = self._load_book_info(
+            book_id=book_id,
+            max_age_days=1,
+        )
+        if book_info:
+            return book_info
+        info_html = await self.fetcher.get_book_info(book_id)
+        self._save_html_pages(html_dir, "info", info_html)
+        book_info = self.parser.parse_book_info(info_html)
+        if book_info:
+            self._save_book_info(book_id, book_info)
+            return book_info
+        return self._load_book_info(book_id)
     @abc.abstractmethod
     async def _download_one(
         self,
@@ -147,29 +192,110 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
         """
         return
-    @property
-    def fetcher(self) -> FetcherProtocol:
-        return self._fetcher
+    def _load_book_info(
+        self,
+        book_id: str,
+        *,
+        max_age_days: int | None = None,
+    ) -> dict[str, Any]:
+        """
+        Attempt to read and parse the book_info.json for a given book_id.
-    @property
-    def parser(self) -> ParserProtocol:
-        return self._parser
+        :param book_id: identifier of the book
+        :param max_age_days: if set, only return if 'update_time' is less
+        :return: dict of book info if is valid JSON, else empty
+        """
+        info_path = self._raw_data_dir / book_id / "book_info.json"
+        if not info_path.is_file():
+            return {}
-    @property
-    def config(self) -> DownloaderConfig:
-        return self._config
+        try:
+            data: dict[str, Any] = json.loads(info_path.read_text(encoding="utf-8"))
+        except json.JSONDecodeError:
+            return {}
+        if max_age_days is not None:
+            days, *_ = calculate_time_difference(
+                data.get("update_time", ""),
+                "UTC+8",
+            )
+            if days > max_age_days:
+                return {}
-    @property
-    def raw_data_dir(self) -> Path:
-        return self._raw_data_dir
+        return data
+    def _save_book_info(
+        self,
+        book_id: str,
+        book_info: dict[str, Any],
+    ) -> None:
+        """
+        Serialize and save the book_info dict as json.
+        :param book_id: identifier of the book
+        :param book_info: dict containing metadata about the book
+        """
+        target_dir = self._raw_data_dir / book_id
+        target_dir.mkdir(parents=True, exist_ok=True)
+        (target_dir / "book_info.json").write_text(
+            json.dumps(book_info, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+    def _save_html_pages(
+        self,
+        html_dir: Path,
+        filename: str,
+        html_list: Sequence[str],
+    ) -> None:
+        """
+        If save_html is enabled, write each HTML snippet to a file.
+        Filenames will be {chap_id}_{index}.html in html_dir.
+        :param html_dir: directory in which to write HTML files
+        :param filename: used as filename prefix
+        :param html_list: list of HTML strings to save
+        """
+        if not self.save_html:
+            return
+        html_dir.mkdir(parents=True, exist_ok=True)
+        for i, html in enumerate(html_list):
+            file_path = html_dir / f"{filename}_{i}.html"
+            file_path.write_text(html, encoding="utf-8")
+    @staticmethod
+    async def _chapter_ids(
+        volumes: list[dict[str, Any]],
+        start_id: str | None,
+        end_id: str | None,
+    ) -> AsyncIterator[str]:
+        """
+        Yield each chapterId in order, respecting start/end bounds.
+        """
+        seen_start = start_id is None
+        for vol in volumes:
+            for chap in vol.get("chapters", []):
+                cid = chap.get("chapterId")
+                if not cid:
+                    continue
+                if not seen_start:
+                    if cid == start_id:
+                        seen_start = True
+                    else:
+                        continue
+                yield cid
+                if end_id is not None and cid == end_id:
+                    return
     @property
-    def cache_dir(self) -> Path:
-        return self._cache_dir
+    def fetcher(self) -> FetcherProtocol:
+        return self._fetcher
     @property
-    def site(self) -> str:
-        return self._site
+    def parser(self) -> ParserProtocol:
+        return self._parser
     @property
     def save_html(self) -> bool:
@@ -196,12 +322,12 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
         return self._config.backoff_factor
     @property
-    def parser_workers(self) -> int:
-        return self._config.parser_workers
+    def workers(self) -> int:
+        return self._config.workers
     @property
-    def download_workers(self) -> int:
-        return self._config.download_workers
+    def storage_batch_size(self) -> int:
+        return max(1, self._config.storage_batch_size)
     def _handle_download_exception(self, book: BookConfig, error: Exception) -> None:
         """

novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

novel-downloader 1.4.5py3-none-any.whl → 1.5.0py3-none-any.whl