PyPI - novel-downloader - Versions diffs - 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

novel-downloader 1.4.4py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +2 -2
novel_downloader/cli/config.py +1 -83
novel_downloader/cli/download.py +4 -5
novel_downloader/cli/export.py +4 -1
novel_downloader/cli/main.py +2 -0
novel_downloader/cli/search.py +123 -0
novel_downloader/config/__init__.py +3 -10
novel_downloader/config/adapter.py +190 -54
novel_downloader/config/loader.py +2 -3
novel_downloader/core/__init__.py +13 -13
novel_downloader/core/downloaders/__init__.py +10 -11
novel_downloader/core/downloaders/base.py +152 -26
novel_downloader/core/downloaders/biquge.py +5 -1
novel_downloader/core/downloaders/common.py +157 -378
novel_downloader/core/downloaders/esjzone.py +5 -1
novel_downloader/core/downloaders/linovelib.py +5 -1
novel_downloader/core/downloaders/qianbi.py +291 -4
novel_downloader/core/downloaders/qidian.py +199 -285
novel_downloader/core/downloaders/registry.py +67 -0
novel_downloader/core/downloaders/sfacg.py +5 -1
novel_downloader/core/downloaders/yamibo.py +5 -1
novel_downloader/core/exporters/__init__.py +10 -11
novel_downloader/core/exporters/base.py +87 -7
novel_downloader/core/exporters/biquge.py +5 -8
novel_downloader/core/exporters/common/__init__.py +2 -2
novel_downloader/core/exporters/common/epub.py +82 -166
novel_downloader/core/exporters/common/main_exporter.py +0 -60
novel_downloader/core/exporters/common/txt.py +82 -83
novel_downloader/core/exporters/epub_util.py +157 -1330
novel_downloader/core/exporters/esjzone.py +5 -8
novel_downloader/core/exporters/linovelib/__init__.py +2 -2
novel_downloader/core/exporters/linovelib/epub.py +157 -212
novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
novel_downloader/core/exporters/linovelib/txt.py +67 -63
novel_downloader/core/exporters/qianbi.py +5 -8
novel_downloader/core/exporters/qidian.py +14 -4
novel_downloader/core/exporters/registry.py +53 -0
novel_downloader/core/exporters/sfacg.py +5 -8
novel_downloader/core/exporters/txt_util.py +67 -0
novel_downloader/core/exporters/yamibo.py +5 -8
novel_downloader/core/fetchers/__init__.py +19 -24
novel_downloader/core/fetchers/base/__init__.py +3 -3
novel_downloader/core/fetchers/base/browser.py +23 -4
novel_downloader/core/fetchers/base/session.py +30 -5
novel_downloader/core/fetchers/biquge/__init__.py +3 -3
novel_downloader/core/fetchers/biquge/browser.py +5 -0
novel_downloader/core/fetchers/biquge/session.py +6 -1
novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
novel_downloader/core/fetchers/esjzone/browser.py +5 -0
novel_downloader/core/fetchers/esjzone/session.py +6 -1
novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
novel_downloader/core/fetchers/linovelib/browser.py +6 -1
novel_downloader/core/fetchers/linovelib/session.py +6 -1
novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
novel_downloader/core/fetchers/qianbi/browser.py +5 -0
novel_downloader/core/fetchers/qianbi/session.py +5 -0
novel_downloader/core/fetchers/qidian/__init__.py +3 -3
novel_downloader/core/fetchers/qidian/browser.py +12 -4
novel_downloader/core/fetchers/qidian/session.py +11 -3
novel_downloader/core/fetchers/registry.py +71 -0
novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
novel_downloader/core/fetchers/sfacg/browser.py +5 -0
novel_downloader/core/fetchers/sfacg/session.py +5 -0
novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
novel_downloader/core/fetchers/yamibo/browser.py +5 -0
novel_downloader/core/fetchers/yamibo/session.py +6 -1
novel_downloader/core/interfaces/__init__.py +7 -5
novel_downloader/core/interfaces/searcher.py +18 -0
novel_downloader/core/parsers/__init__.py +10 -11
novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
novel_downloader/core/parsers/qidian/__init__.py +2 -2
novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
novel_downloader/core/parsers/qidian/main_parser.py +10 -21
novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
novel_downloader/core/parsers/registry.py +68 -0
novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
novel_downloader/core/searchers/__init__.py +20 -0
novel_downloader/core/searchers/base.py +92 -0
novel_downloader/core/searchers/biquge.py +83 -0
novel_downloader/core/searchers/esjzone.py +84 -0
novel_downloader/core/searchers/qianbi.py +131 -0
novel_downloader/core/searchers/qidian.py +87 -0
novel_downloader/core/searchers/registry.py +63 -0
novel_downloader/locales/en.json +12 -4
novel_downloader/locales/zh.json +12 -4
novel_downloader/models/__init__.py +4 -30
novel_downloader/models/config.py +12 -6
novel_downloader/models/search.py +16 -0
novel_downloader/models/types.py +0 -2
novel_downloader/resources/config/settings.toml +31 -4
novel_downloader/resources/css_styles/intro.css +83 -0
novel_downloader/resources/css_styles/main.css +30 -89
novel_downloader/utils/__init__.py +52 -0
novel_downloader/utils/chapter_storage.py +244 -224
novel_downloader/utils/constants.py +1 -21
novel_downloader/utils/epub/__init__.py +34 -0
novel_downloader/utils/epub/builder.py +377 -0
novel_downloader/utils/epub/constants.py +77 -0
novel_downloader/utils/epub/documents.py +403 -0
novel_downloader/utils/epub/models.py +134 -0
novel_downloader/utils/epub/utils.py +212 -0
novel_downloader/utils/file_utils/__init__.py +10 -14
novel_downloader/utils/file_utils/io.py +20 -51
novel_downloader/utils/file_utils/normalize.py +2 -2
novel_downloader/utils/file_utils/sanitize.py +2 -3
novel_downloader/utils/fontocr/__init__.py +5 -5
novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
novel_downloader/utils/fontocr/ocr_v1.py +13 -1
novel_downloader/utils/fontocr/ocr_v2.py +13 -1
novel_downloader/utils/fontocr/ocr_v3.py +744 -0
novel_downloader/utils/i18n.py +2 -0
novel_downloader/utils/logger.py +2 -0
novel_downloader/utils/network.py +110 -251
novel_downloader/utils/state.py +1 -0
novel_downloader/utils/text_utils/__init__.py +18 -17
novel_downloader/utils/text_utils/diff_display.py +4 -5
novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
novel_downloader/utils/text_utils/text_cleaner.py +179 -0
novel_downloader/utils/text_utils/truncate_utils.py +62 -0
novel_downloader/utils/time_utils/__init__.py +3 -3
novel_downloader/utils/time_utils/datetime_utils.py +4 -5
novel_downloader/utils/time_utils/sleep_utils.py +2 -3
{novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
novel_downloader-1.5.0.dist-info/RECORD +164 -0
novel_downloader/config/site_rules.py +0 -94
novel_downloader/core/factory/__init__.py +0 -20
novel_downloader/core/factory/downloader.py +0 -73
novel_downloader/core/factory/exporter.py +0 -58
novel_downloader/core/factory/fetcher.py +0 -96
novel_downloader/core/factory/parser.py +0 -86
novel_downloader/core/fetchers/common/__init__.py +0 -14
novel_downloader/core/fetchers/common/browser.py +0 -79
novel_downloader/core/fetchers/common/session.py +0 -79
novel_downloader/core/parsers/biquge/__init__.py +0 -10
novel_downloader/core/parsers/common/__init__.py +0 -13
novel_downloader/core/parsers/common/helper.py +0 -323
novel_downloader/core/parsers/common/main_parser.py +0 -106
novel_downloader/core/parsers/esjzone/__init__.py +0 -10
novel_downloader/core/parsers/linovelib/__init__.py +0 -10
novel_downloader/core/parsers/qianbi/__init__.py +0 -10
novel_downloader/core/parsers/sfacg/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/__init__.py +0 -10
novel_downloader/models/browser.py +0 -21
novel_downloader/models/site_rules.py +0 -99
novel_downloader/models/tasks.py +0 -33
novel_downloader/resources/css_styles/volume-intro.css +0 -56
novel_downloader/resources/json/replace_word_map.json +0 -4
novel_downloader/resources/text/blacklist.txt +0 -22
novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
novel_downloader/utils/text_utils/font_mapping.py +0 -28
novel_downloader/utils/text_utils/text_cleaning.py +0 -107
novel_downloader-1.4.4.dist-info/RECORD +0 -165
{novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
{novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0

novel_downloader/core/fetchers/common/browser.py DELETED Viewed

@@ -1,79 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.fetchers.common.browser
----------------------------------------------
-"""
-from typing import Any
-from novel_downloader.core.fetchers.base import BaseBrowser
-from novel_downloader.models import FetcherConfig, SiteProfile
-class CommonBrowser(BaseBrowser):
-    """
-    A common async browser for handling site-specific HTTP requests.
-    """
-    def __init__(
-        self,
-        site: str,
-        profile: SiteProfile,
-        config: FetcherConfig,
-        reuse_page: bool = False,
-        **kwargs: Any,
-    ) -> None:
-        super().__init__(site, config, reuse_page, **kwargs)
-        self._profile = profile
-    async def get_book_info(
-        self,
-        book_id: str,
-        **kwargs: Any,
-    ) -> list[str]:
-        """
-        Fetch the raw HTML of the book info page asynchronously.
-        :param book_id: The book identifier.
-        :return: The page content as a string.
-        """
-        url = self.book_info_url(book_id=book_id)
-        return [await self.fetch(url, **kwargs)]
-    async def get_book_chapter(
-        self,
-        book_id: str,
-        chapter_id: str,
-        **kwargs: Any,
-    ) -> list[str]:
-        """
-        Fetch the raw HTML of a single chapter asynchronously.
-        :param book_id: The book identifier.
-        :param chapter_id: The chapter identifier.
-        :return: The chapter content as a string.
-        """
-        url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
-        return [await self.fetch(url, **kwargs)]
-    def book_info_url(self, book_id: str) -> str:
-        """
-        Construct the URL for fetching a book's info page.
-        :param book_id: The identifier of the book.
-        :return: Fully qualified URL for the book info page.
-        """
-        return self._profile["book_info_url"].format(book_id=book_id)
-    def chapter_url(self, book_id: str, chapter_id: str) -> str:
-        """
-        Construct the URL for fetching a specific chapter.
-        :param book_id: The identifier of the book.
-        :param chapter_id: The identifier of the chapter.
-        :return: Fully qualified chapter URL.
-        """
-        return self._profile["chapter_url"].format(
-            book_id=book_id, chapter_id=chapter_id
-        )

novel_downloader/core/fetchers/common/session.py DELETED Viewed

@@ -1,79 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.fetchers.common.session
----------------------------------------------
-"""
-from typing import Any
-from novel_downloader.core.fetchers.base import BaseSession
-from novel_downloader.models import FetcherConfig, SiteProfile
-class CommonSession(BaseSession):
-    """
-    A common async session for handling site-specific HTTP requests.
-    """
-    def __init__(
-        self,
-        site: str,
-        profile: SiteProfile,
-        config: FetcherConfig,
-        cookies: dict[str, str] | None = None,
-        **kwargs: Any,
-    ) -> None:
-        super().__init__(site, config, cookies, **kwargs)
-        self._profile = profile
-    async def get_book_info(
-        self,
-        book_id: str,
-        **kwargs: Any,
-    ) -> list[str]:
-        """
-        Fetch the raw HTML of the book info page asynchronously.
-        :param book_id: The book identifier.
-        :return: The page content as a string.
-        """
-        url = self.book_info_url(book_id=book_id)
-        return [await self.fetch(url, **kwargs)]
-    async def get_book_chapter(
-        self,
-        book_id: str,
-        chapter_id: str,
-        **kwargs: Any,
-    ) -> list[str]:
-        """
-        Fetch the raw HTML of a single chapter asynchronously.
-        :param book_id: The book identifier.
-        :param chapter_id: The chapter identifier.
-        :return: The chapter content as a string.
-        """
-        url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
-        return [await self.fetch(url, **kwargs)]
-    def book_info_url(self, book_id: str) -> str:
-        """
-        Construct the URL for fetching a book's info page.
-        :param book_id: The identifier of the book.
-        :return: Fully qualified URL for the book info page.
-        """
-        return self._profile["book_info_url"].format(book_id=book_id)
-    def chapter_url(self, book_id: str, chapter_id: str) -> str:
-        """
-        Construct the URL for fetching a specific chapter.
-        :param book_id: The identifier of the book.
-        :param chapter_id: The identifier of the chapter.
-        :return: Fully qualified chapter URL.
-        """
-        return self._profile["chapter_url"].format(
-            book_id=book_id, chapter_id=chapter_id
-        )

novel_downloader/core/parsers/biquge/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.parsers.biquge
-------------------------------------
-"""
-from .main_parser import BiqugeParser
-__all__ = ["BiqugeParser"]

novel_downloader/core/parsers/common/__init__.py DELETED Viewed

@@ -1,13 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.parsers.common
-------------------------------------
-This module provides a CommonParser class that implements
-general-purpose parsing logic for extracting novel metadata
-and chapter content based on site-specific rules.
-"""
-from .main_parser import CommonParser
-__all__ = ["CommonParser"]

novel_downloader/core/parsers/common/helper.py DELETED Viewed

@@ -1,323 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.parsers.common.helper
--------------------------------------------
-Shared utility functions for parsing Common pages.
-"""
-import logging
-import re
-from collections.abc import Iterable, Iterator
-from typing import Any, cast
-from bs4 import BeautifulSoup, Tag
-from novel_downloader.models import (
-    BookInfoRules,
-    FieldRules,
-    RuleStep,
-    VolumesRules,
-)
-logger = logging.getLogger(__name__)
-def html_to_soup(html_str: str) -> BeautifulSoup:
-    """
-    Convert an HTML string to a BeautifulSoup object with fallback.
-    :param html_str: Raw HTML string.
-    :return: Parsed BeautifulSoup object.
-    """
-    try:
-        return BeautifulSoup(html_str, "lxml")
-    except Exception as e:
-        logger.warning("[Parser] lxml parse failed, falling back: %s", e)
-        return BeautifulSoup(html_str, "html.parser")
-class HTMLExtractor:
-    """
-    HTML extraction engine that applies a sequence of RuleSteps to
-    pull data out of a page.
-    """
-    def __init__(self, html: str):
-        self._html = html
-        self._soup = html_to_soup(html)
-    def extract_book_info(self, rules: BookInfoRules) -> dict[str, Any]:
-        """
-        Extract structured book information from HTML according to the given rules.
-        Only non-empty fields in the rules are processed.
-        :param rules: Extraction configuration specifying how to extract.
-        :return: A dictionary containing extracted book information.
-        """
-        book_info: dict[str, Any] = {}
-        for field_name, field_rules in rules.items():
-            if field_rules is None:
-                continue
-            if field_name == "volumes":
-                book_info[field_name] = self.extract_volumes_structure(
-                    cast(VolumesRules, field_rules)
-                )
-            else:
-                steps = cast(FieldRules, field_rules)["steps"]
-                book_info[field_name] = self.extract_field(steps)
-        return book_info
-    def extract_field(self, steps: list[RuleStep]) -> str:
-        """
-        Execute a list of extraction steps on the given HTML.
-        - If any step yields None, stops processing further steps.
-        - At the end, always returns a str:
-        * If current is a list, converts items to text and joins with '\n'.
-        * If current is a Tag, extracts its .get_text().
-        * Else, uses str().
-        """
-        def flatten_list(items: Iterable[Any]) -> Iterator[Any]:
-            for item in items:
-                if isinstance(item, list):
-                    yield from flatten_list(item)
-                else:
-                    yield item
-        def to_text(item: Any) -> str:
-            if isinstance(item, Tag):
-                return str(item.get_text().strip())
-            return str(item).strip()
-        current: Any = self._soup
-        for step in steps:
-            t = step.get("type")
-            if t == "select_one":
-                sel = step.get("selector")
-                current = current.select_one(sel) if sel else None
-            elif t == "select":
-                sel = step.get("selector")
-                lst = current.select(sel) if sel else []
-                idx = step.get("index")
-                current = lst[idx] if idx is not None and idx < len(lst) else lst
-            elif t == "exclude":
-                sel = step.get("selector")
-                for elem in current.select(sel or ""):
-                    elem.decompose()
-            elif t == "find":
-                nm = step.get("name")
-                attrs = step.get("attrs") or {}
-                current = current.find(nm, attrs=attrs)
-            elif t == "find_all":
-                nm = step.get("name")
-                attrs = step.get("attrs") or {}
-                lst = current.find_all(nm, attrs=attrs, limit=step.get("limit"))
-                idx = step.get("index")
-                current = lst[idx] if idx is not None and idx < len(lst) else lst
-            elif t == "text":
-                if isinstance(current, list):
-                    current = [elem.get_text() for elem in current]
-                elif isinstance(current, Tag):
-                    current = current.get_text()
-            elif t == "strip":
-                chars = step.get("chars")
-                if isinstance(current, list):
-                    current = [c.strip(chars) for c in current]
-                elif isinstance(current, str):
-                    current = current.strip(chars)
-            elif t == "regex":
-                txt = str(current or "")
-                pat = step.get("pattern") or ""
-                flags = step.get("flags")
-                flags = flags if flags is not None else 0
-                match = re.compile(pat, flags).search(txt)
-                if match:
-                    template = step.get("template")
-                    if template:
-                        s = template
-                        for i in range(1, len(match.groups()) + 1):
-                            s = s.replace(f"${i}", match.group(i) or "")
-                        current = s
-                    else:
-                        grp = step.get("group")
-                        grp = grp if grp is not None else 0
-                        current = match.group(grp)
-                else:
-                    current = ""
-            elif t == "replace":
-                old = step.get("old")
-                old = old if old is not None else ""
-                new = step.get("new")
-                new = new if new is not None else ""
-                cnt = step.get("count")
-                cnt = cnt if cnt is not None else -1
-                if isinstance(current, list):
-                    current = [c.replace(old, new, cnt) for c in current]
-                elif isinstance(current, str):
-                    current = current.replace(old, new, cnt)
-            elif t == "split":
-                sep = step.get("sep", "")
-                idx = step.get("index")
-                idx = idx if idx is not None else 0
-                parts = (current or "").split(sep)
-                current = parts[idx] if idx < len(parts) else ""
-            elif t == "join":
-                sep = step.get("sep")
-                sep = sep if sep is not None else ""
-                if isinstance(current, list):
-                    current = sep.join(current)
-            elif t == "attr":
-                name = step.get("attr") or ""
-                if isinstance(current, list):
-                    current = [elem.get(name, "") for elem in current]
-                elif isinstance(current, Tag):
-                    current = current.get(name, "")
-            else:
-                raise ValueError(f"Unsupported step type: {t}")
-            if current is None:
-                break
-        # Final normalization
-        if isinstance(current, list):
-            flat = list(flatten_list(current))
-            texts = [to_text(x) for x in flat if x is not None]
-            return "\n".join(texts)
-        if isinstance(current, Tag):
-            return str(current.get_text().strip())
-        return str(current or "").strip()
-    def extract_mixed_volumes(self, volume_rule: VolumesRules) -> list[dict[str, Any]]:
-        """
-        Special mode: mixed <volume> and <chapter> under same parent.
-        (e.g., dt / dd pattern in BiQuGe)
-        """
-        list_selector = volume_rule.get("list_selector")
-        volume_selector = volume_rule.get("volume_selector")
-        volume_name_steps = volume_rule.get("volume_name_steps")
-        chapter_selector = volume_rule["chapter_selector"]
-        chapter_steps_list = volume_rule["chapter_steps"]
-        if not (
-            list_selector and volume_selector and chapter_selector and volume_name_steps
-        ):
-            raise ValueError(
-                "volume_mode='mixed' 时, 必须提供 list_selector, volume_selector, "
-                "chapter_selector 和 volume_name_steps"
-            )
-        volumes: list[dict[str, Any]] = []
-        current_volume: dict[str, Any] | None = None
-        if not chapter_steps_list:
-            chapter_steps_list = []
-        chapter_info_steps = {item["key"]: item["steps"] for item in chapter_steps_list}
-        list_area = self._soup.select_one(list_selector)
-        if not list_area:
-            raise ValueError(f"找不到 list_selector: {list_selector}")
-        for elem in list_area.find_all(
-            [volume_selector, chapter_selector], recursive=True
-        ):
-            if not isinstance(elem, Tag):
-                continue
-            if elem.name == volume_selector:
-                extractor = HTMLExtractor(str(elem))
-                volume_name = extractor.extract_field(volume_name_steps)
-                current_volume = {"volume_name": volume_name, "chapters": []}
-                volumes.append(current_volume)
-            elif elem.name == chapter_selector and current_volume is not None:
-                chap_extractor = HTMLExtractor(str(elem))
-                chapter_data = {}
-                for field, steps in chapter_info_steps.items():
-                    chapter_data[field] = chap_extractor.extract_field(steps)
-                current_volume["chapters"].append(chapter_data)
-        return volumes
-    def extract_volume_blocks(self, volume_rule: VolumesRules) -> list[dict[str, Any]]:
-        volume_selector = volume_rule.get("volume_selector")
-        volume_name_steps = volume_rule.get("volume_name_steps")
-        chapter_selector = volume_rule["chapter_selector"]
-        chapter_steps_list = volume_rule["chapter_steps"]
-        if not (volume_selector and volume_name_steps):
-            raise ValueError(
-                "has_volume=True 时, 必须提供 volume_selector 和 volume_name_steps"
-            )
-        volumes = []
-        chapter_info_steps = {item["key"]: item["steps"] for item in chapter_steps_list}
-        for vol in self._soup.select(volume_selector):
-            extractor = HTMLExtractor(str(vol))
-            volume_name = extractor.extract_field(volume_name_steps)
-            chapters = []
-            for chap in vol.select(chapter_selector):
-                chap_extractor = HTMLExtractor(str(chap))
-                chapter_data = {}
-                for field, steps in chapter_info_steps.items():
-                    chapter_data[field] = chap_extractor.extract_field(steps)
-                chapters.append(chapter_data)
-            volumes.append({"volume_name": volume_name, "chapters": chapters})
-        return volumes
-    def extract_flat_chapters(self, volume_rule: VolumesRules) -> list[dict[str, Any]]:
-        chapter_selector = volume_rule["chapter_selector"]
-        chapter_steps_list = volume_rule["chapter_steps"]
-        volume_selector = volume_rule.get("volume_selector")
-        volumes = []
-        chapter_info_steps = {item["key"]: item["steps"] for item in chapter_steps_list}
-        if volume_selector:
-            candidates = self._soup.select(volume_selector)
-        else:
-            candidates = [self._soup]
-        all_chapters = []
-        for area in candidates:
-            for chap in area.select(chapter_selector):
-                chap_extractor = HTMLExtractor(str(chap))
-                chapter_data = {}
-                for field, steps in chapter_info_steps.items():
-                    chapter_data[field] = chap_extractor.extract_field(steps)
-                all_chapters.append(chapter_data)
-        volumes.append({"volume_name": "未分卷", "chapters": all_chapters})
-        return volumes
-    def extract_volumes_structure(
-        self, volume_rule: VolumesRules
-    ) -> list[dict[str, Any]]:
-        volume_mode = volume_rule.get("volume_mode", "normal")
-        if volume_mode == "mixed":
-            return self.extract_mixed_volumes(volume_rule)
-        if volume_rule.get("has_volume", True):
-            return self.extract_volume_blocks(volume_rule)
-        else:
-            return self.extract_flat_chapters(volume_rule)

novel_downloader/core/parsers/common/main_parser.py DELETED Viewed

@@ -1,106 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.parsers.common.main_parser
-------------------------------------------------
-This package provides parsing components for handling
-Common pages.
-"""
-from typing import Any
-from novel_downloader.core.parsers.base import BaseParser
-from novel_downloader.models import (
-    ChapterDict,
-    ParserConfig,
-    SiteRules,
-)
-# from .helper import HTMLExtractor
-class CommonParser(BaseParser):
-    """
-    CommonParser extends BaseParser to support site-specific parsing rules.
-    It accepts additional site information and site-specific rules during initialization
-    """
-    def __init__(self, config: ParserConfig, site: str, site_rule: SiteRules):
-        """
-        Initialize the parser with configuration, site name, and site-specific rules.
-        :param config: ParserConfig object controlling parsing behavior.
-        :param site: Name of the site this parser is targeting.
-        :param site_rule: SiteRules object containing parsing rules for the site.
-        """
-        super().__init__(config)
-        self._site = site
-        self._site_rule = site_rule
-    def parse_book_info(
-        self,
-        html_list: list[str],
-        **kwargs: Any,
-    ) -> dict[str, Any]:
-        """
-        Parse a book info page and extract metadata and chapter structure.
-        :param html_list: Raw HTML of the book info page.
-        :return: Parsed metadata and chapter structure as a dictionary.
-        """
-        if not html_list:
-            return {}
-        # extractor = HTMLExtractor(html_list[0])
-        # rules = self._site_rule["book_info"]
-        # return extractor.extract_book_info(rules)
-        return {}
-    def parse_chapter(
-        self,
-        html_list: list[str],
-        chapter_id: str,
-        **kwargs: Any,
-    ) -> ChapterDict | None:
-        """
-        Parse a single chapter page and extract clean text or simplified HTML.
-        :param html_list: Raw HTML of the chapter page.
-        :param chapter_id: Identifier of the chapter being parsed.
-        :return: Cleaned chapter content as plain text or minimal HTML.
-        """
-        if not html_list:
-            return None
-        # extractor = HTMLExtractor(html_list[0])
-        # chapter_rules = self._site_rule["chapter"]
-        # # 必须有正文内容
-        # content_steps = chapter_rules.get("content")
-        # if not content_steps:
-        #     raise ValueError(f"No chapter content steps for site: {self._site}")
-        # title_steps = chapter_rules.get("title")
-        # title = extractor.extract_field(title_steps["steps"]) if title_steps else ""
-        # content = extractor.extract_field(content_steps["steps"])
-        # if not content:
-        #     return None
-        # return {
-        #     "id": chapter_id,
-        #     "title": title or "Untitled",
-        #     "content": content,
-        #     "extra": {
-        #         "site": self._site,
-        #     },
-        # }
-        return None
-    @property
-    def site(self) -> str:
-        """Return the site name."""
-        return self._site
-    @property
-    def site_rule(self) -> SiteRules:
-        """Return the site-specific rules."""
-        return self._site_rule

novel_downloader/core/parsers/esjzone/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.parsers.esjzone
--------------------------------------
-"""
-from .main_parser import EsjzoneParser
-__all__ = ["EsjzoneParser"]

novel_downloader/core/parsers/linovelib/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.parsers.linovelib
----------------------------------------
-"""
-from .main_parser import LinovelibParser
-__all__ = ["LinovelibParser"]

novel_downloader/core/parsers/qianbi/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.parsers.qianbi
-------------------------------------
-"""
-from .main_parser import QianbiParser
-__all__ = ["QianbiParser"]

novel_downloader/core/parsers/sfacg/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.parsers.sfacg
------------------------------------
-"""
-from .main_parser import SfacgParser
-__all__ = ["SfacgParser"]

novel_downloader/core/parsers/yamibo/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.core.parsers.yamibo
-------------------------------------
-"""
-from .main_parser import YamiboParser
-__all__ = ["YamiboParser"]

novel_downloader/models/browser.py DELETED Viewed

@@ -1,21 +0,0 @@
-#!/usr/bin/env python3
-"""
-novel_downloader.models.browser
--------------------------------
-"""
-from pathlib import Path
-from typing import TypedDict
-from playwright.async_api import ViewportSize
-class NewContextOptions(TypedDict, total=False):
-    user_agent: str
-    locale: str
-    storage_state: Path
-    viewport: ViewportSize
-    java_script_enabled: bool
-    ignore_https_errors: bool
-    extra_http_headers: dict[str, str]

novel-downloader 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl

novel-downloader 1.4.4py3-none-any.whl → 1.5.0py3-none-any.whl