PyPI - novel-downloader - Versions diffs - 1.3.3__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

novel-downloader 1.3.3py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (211) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/clean.py +97 -78
novel_downloader/cli/config.py +177 -0
novel_downloader/cli/download.py +132 -87
novel_downloader/cli/export.py +77 -0
novel_downloader/cli/main.py +21 -28
novel_downloader/config/__init__.py +1 -25
novel_downloader/config/adapter.py +32 -31
novel_downloader/config/loader.py +3 -3
novel_downloader/config/site_rules.py +1 -2
novel_downloader/core/__init__.py +3 -6
novel_downloader/core/downloaders/__init__.py +10 -13
novel_downloader/core/downloaders/base.py +233 -0
novel_downloader/core/downloaders/biquge.py +27 -0
novel_downloader/core/downloaders/common.py +414 -0
novel_downloader/core/downloaders/esjzone.py +27 -0
novel_downloader/core/downloaders/linovelib.py +27 -0
novel_downloader/core/downloaders/qianbi.py +27 -0
novel_downloader/core/downloaders/qidian.py +352 -0
novel_downloader/core/downloaders/sfacg.py +27 -0
novel_downloader/core/downloaders/yamibo.py +27 -0
novel_downloader/core/exporters/__init__.py +37 -0
novel_downloader/core/{savers → exporters}/base.py +73 -39
novel_downloader/core/exporters/biquge.py +25 -0
novel_downloader/core/exporters/common/__init__.py +12 -0
novel_downloader/core/{savers → exporters}/common/epub.py +22 -22
novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +35 -40
novel_downloader/core/{savers → exporters}/common/txt.py +20 -23
novel_downloader/core/{savers → exporters}/epub_utils/__init__.py +8 -3
novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -2
novel_downloader/core/{savers → exporters}/epub_utils/image_loader.py +46 -4
novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -4
novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +3 -3
novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -2
novel_downloader/core/exporters/esjzone.py +25 -0
novel_downloader/core/exporters/linovelib/__init__.py +10 -0
novel_downloader/core/exporters/linovelib/epub.py +449 -0
novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
novel_downloader/core/exporters/linovelib/txt.py +129 -0
novel_downloader/core/exporters/qianbi.py +25 -0
novel_downloader/core/{savers → exporters}/qidian.py +8 -8
novel_downloader/core/exporters/sfacg.py +25 -0
novel_downloader/core/exporters/yamibo.py +25 -0
novel_downloader/core/factory/__init__.py +5 -17
novel_downloader/core/factory/downloader.py +24 -126
novel_downloader/core/factory/exporter.py +58 -0
novel_downloader/core/factory/fetcher.py +96 -0
novel_downloader/core/factory/parser.py +17 -12
novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
novel_downloader/core/fetchers/base/browser.py +383 -0
novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
novel_downloader/core/fetchers/base/session.py +419 -0
novel_downloader/core/fetchers/biquge/__init__.py +14 -0
novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
novel_downloader/core/fetchers/common/__init__.py +14 -0
novel_downloader/core/fetchers/common/browser.py +79 -0
novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
novel_downloader/core/fetchers/esjzone/browser.py +202 -0
novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
novel_downloader/core/fetchers/linovelib/browser.py +193 -0
novel_downloader/core/fetchers/linovelib/session.py +193 -0
novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
novel_downloader/core/fetchers/qidian/__init__.py +14 -0
novel_downloader/core/fetchers/qidian/browser.py +266 -0
novel_downloader/core/fetchers/qidian/session.py +326 -0
novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
novel_downloader/core/fetchers/sfacg/browser.py +189 -0
novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
novel_downloader/core/fetchers/yamibo/browser.py +229 -0
novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
novel_downloader/core/interfaces/__init__.py +8 -12
novel_downloader/core/interfaces/downloader.py +54 -0
novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
novel_downloader/core/interfaces/fetcher.py +162 -0
novel_downloader/core/interfaces/parser.py +6 -7
novel_downloader/core/parsers/__init__.py +5 -6
novel_downloader/core/parsers/base.py +9 -13
novel_downloader/core/parsers/biquge/main_parser.py +12 -13
novel_downloader/core/parsers/common/helper.py +3 -3
novel_downloader/core/parsers/common/main_parser.py +39 -34
novel_downloader/core/parsers/esjzone/main_parser.py +20 -14
novel_downloader/core/parsers/linovelib/__init__.py +10 -0
novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
novel_downloader/core/parsers/qidian/__init__.py +2 -11
novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
novel_downloader/locales/en.json +18 -2
novel_downloader/locales/zh.json +18 -2
novel_downloader/models/__init__.py +64 -0
novel_downloader/models/browser.py +21 -0
novel_downloader/models/chapter.py +25 -0
novel_downloader/models/config.py +100 -0
novel_downloader/models/login.py +20 -0
novel_downloader/models/site_rules.py +99 -0
novel_downloader/models/tasks.py +33 -0
novel_downloader/models/types.py +15 -0
novel_downloader/resources/config/settings.toml +31 -25
novel_downloader/resources/json/linovelib_font_map.json +3573 -0
novel_downloader/tui/__init__.py +7 -0
novel_downloader/tui/app.py +32 -0
novel_downloader/tui/main.py +17 -0
novel_downloader/tui/screens/__init__.py +14 -0
novel_downloader/tui/screens/home.py +191 -0
novel_downloader/tui/screens/login.py +74 -0
novel_downloader/tui/styles/home_layout.tcss +79 -0
novel_downloader/tui/widgets/richlog_handler.py +24 -0
novel_downloader/utils/__init__.py +6 -0
novel_downloader/utils/chapter_storage.py +25 -38
novel_downloader/utils/constants.py +11 -5
novel_downloader/utils/cookies.py +66 -0
novel_downloader/utils/crypto_utils.py +1 -74
novel_downloader/utils/fontocr/ocr_v1.py +2 -1
novel_downloader/utils/fontocr/ocr_v2.py +2 -2
novel_downloader/utils/hash_store.py +10 -18
novel_downloader/utils/hash_utils.py +3 -2
novel_downloader/utils/logger.py +2 -3
novel_downloader/utils/network.py +2 -1
novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
novel_downloader/utils/text_utils/font_mapping.py +1 -1
novel_downloader/utils/text_utils/text_cleaning.py +1 -1
novel_downloader/utils/time_utils/datetime_utils.py +3 -3
novel_downloader/utils/time_utils/sleep_utils.py +1 -1
{novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/METADATA +69 -35
novel_downloader-1.4.1.dist-info/RECORD +170 -0
{novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/WHEEL +1 -1
{novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/entry_points.txt +1 -0
novel_downloader/cli/interactive.py +0 -66
novel_downloader/cli/settings.py +0 -177
novel_downloader/config/models.py +0 -187
novel_downloader/core/downloaders/base/__init__.py +0 -14
novel_downloader/core/downloaders/base/base_async.py +0 -153
novel_downloader/core/downloaders/base/base_sync.py +0 -208
novel_downloader/core/downloaders/biquge/__init__.py +0 -14
novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
novel_downloader/core/downloaders/common/__init__.py +0 -14
novel_downloader/core/downloaders/common/common_async.py +0 -210
novel_downloader/core/downloaders/common/common_sync.py +0 -202
novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
novel_downloader/core/downloaders/qidian/__init__.py +0 -10
novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -219
novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
novel_downloader/core/factory/requester.py +0 -144
novel_downloader/core/factory/saver.py +0 -56
novel_downloader/core/interfaces/async_downloader.py +0 -36
novel_downloader/core/interfaces/async_requester.py +0 -84
novel_downloader/core/interfaces/sync_downloader.py +0 -36
novel_downloader/core/interfaces/sync_requester.py +0 -82
novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
novel_downloader/core/requesters/base/async_session.py +0 -410
novel_downloader/core/requesters/base/browser.py +0 -337
novel_downloader/core/requesters/base/session.py +0 -378
novel_downloader/core/requesters/biquge/__init__.py +0 -14
novel_downloader/core/requesters/common/__init__.py +0 -17
novel_downloader/core/requesters/common/session.py +0 -113
novel_downloader/core/requesters/esjzone/__init__.py +0 -13
novel_downloader/core/requesters/esjzone/session.py +0 -235
novel_downloader/core/requesters/qianbi/__init__.py +0 -13
novel_downloader/core/requesters/qidian/__init__.py +0 -21
novel_downloader/core/requesters/qidian/broswer.py +0 -307
novel_downloader/core/requesters/qidian/session.py +0 -290
novel_downloader/core/requesters/sfacg/__init__.py +0 -13
novel_downloader/core/requesters/sfacg/session.py +0 -242
novel_downloader/core/requesters/yamibo/__init__.py +0 -13
novel_downloader/core/requesters/yamibo/session.py +0 -237
novel_downloader/core/savers/__init__.py +0 -34
novel_downloader/core/savers/biquge.py +0 -25
novel_downloader/core/savers/common/__init__.py +0 -12
novel_downloader/core/savers/esjzone.py +0 -25
novel_downloader/core/savers/qianbi.py +0 -25
novel_downloader/core/savers/sfacg.py +0 -25
novel_downloader/core/savers/yamibo.py +0 -25
novel_downloader/resources/config/rules.toml +0 -196
novel_downloader-1.3.3.dist-info/RECORD +0 -166
{novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/top_level.txt +0 -0

novel_downloader/core/interfaces/downloader.py ADDED Viewed

@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.interfaces.downloader
+-------------------------------------------
+This module defines the DownloaderProtocol, a structural interface
+that outlines the expected behavior of any downloader class.
+"""
+from collections.abc import Awaitable, Callable
+from typing import Any, Protocol, runtime_checkable
+@runtime_checkable
+class DownloaderProtocol(Protocol):
+    """
+    Protocol for fully-asynchronous downloader classes.
+    Defines the expected interface for any downloader implementation,
+    including both batch and single book downloads,
+    as well as optional pre-download hooks.
+    """
+    async def download(
+        self,
+        book_id: str,
+        *,
+        progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Download logic for a single book.
+        :param book_id: The identifier of the book.
+        :param progress_hook: (optional) Called after each chapter;
+                                args: completed_count, total_count.
+        """
+        ...
+    async def download_many(
+        self,
+        book_ids: list[str],
+        *,
+        progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Batch download entry point.
+        :param book_ids: List of book IDs to download.
+        :param progress_hook: (optional) Called after each chapter;
+                                args: completed_count, total_count.
+        """
+        ...

novel_downloader/core/interfaces/{saver.py → exporter.py} RENAMED Viewed

@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
 """
-novel_downloader.core.interfaces.saver
---------------------------------------
+novel_downloader.core.interfaces.exporter
+-----------------------------------------
-Defines the SaverProtocol interface for persisting completed books in
+Defines the ExporterProtocol interface for persisting completed books in
 TXT, EPUB, Markdown, and PDF formats.
 """
@@ -11,23 +11,23 @@ from typing import Protocol, runtime_checkable
 @runtime_checkable
-class SaverProtocol(Protocol):
+class ExporterProtocol(Protocol):
     """
-    A saver must implement a method to persist a completed book as plain text.
+    A exporter must implement a method to persist a completed book as plain text.
-    It may also optionally implement an EPUB (or other format) saver.
+    It may also optionally implement an EPUB (or other format) exporter.
     """
-    def save(self, book_id: str) -> None:
+    def export(self, book_id: str) -> None:
         """
-        Save the book in the formats specified in config.
+        Export the book in the formats specified in config.
         If a method is not implemented or fails, log the error and continue.
         :param book_id: The book identifier (used for filename, lookup, etc.)
         """
         ...
-    def save_as_txt(self, book_id: str) -> None:
+    def export_as_txt(self, book_id: str) -> None:
         """
         Persist the assembled book as a .txt file.
@@ -35,7 +35,7 @@ class SaverProtocol(Protocol):
         """
         ...
-    def save_as_epub(self, book_id: str) -> None:
+    def export_as_epub(self, book_id: str) -> None:
         """
         Optional: Persist the assembled book as an .epub file.
@@ -43,7 +43,7 @@ class SaverProtocol(Protocol):
         """
         ...
-    def save_as_md(self, book_id: str) -> None:
+    def export_as_md(self, book_id: str) -> None:
         """
         Optional: Persist the assembled book as a Markdown (.md) file.
@@ -51,7 +51,7 @@ class SaverProtocol(Protocol):
         """
         ...
-    def save_as_pdf(self, book_id: str) -> None:
+    def export_as_pdf(self, book_id: str) -> None:
         """
         Optional: Persist the assembled book as a PDF file.

novel_downloader/core/interfaces/fetcher.py ADDED Viewed

@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.interfaces.fetcher
+----------------------------------------
+Defines the Async FetcherProtocol interface for fetching raw HTML or JSON
+for book info pages, individual chapters, managing request lifecycle
+"""
+import types
+from typing import Any, Protocol, Self, runtime_checkable
+from novel_downloader.models import LoginField
+@runtime_checkable
+class FetcherProtocol(Protocol):
+    """
+    An async requester must be able to fetch raw HTML/data for:
+      - a book's info page,
+      - a specific chapter page,
+    and manage login/shutdown asynchronously.
+    """
+    async def login(
+        self,
+        username: str = "",
+        password: str = "",
+        cookies: dict[str, str] | None = None,
+        attempt: int = 1,
+        **kwargs: Any,
+    ) -> bool:
+        """
+        Attempt to log in asynchronously.
+        :returns: True if login succeeded.
+        """
+        ...
+    async def get_book_info(
+        self,
+        book_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML (or JSON) of the book info page asynchronously.
+        :param book_id: The book identifier.
+        :return: The page content as a string.
+        """
+        ...
+    async def get_book_chapter(
+        self,
+        book_id: str,
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML (or JSON) of a single chapter asynchronously.
+        :param book_id: The book identifier.
+        :param chapter_id: The chapter identifier.
+        :return: The chapter content as string.
+        """
+        ...
+    async def get_bookcase(
+        self,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Optional: Retrieve the HTML content of the authenticated
+        user's bookcase page asynchronously.
+        :return: The HTML markup of the bookcase page.
+        """
+        ...
+    async def fetch(
+        self,
+        url: str,
+        **kwargs: Any,
+    ) -> str:
+        """
+        Perform a generic HTTP request and return the response body as text.
+        :param url: The URL to request.
+        :return: The response content as a string (HTML or JSON or plain text).
+        """
+        ...
+    async def init(
+        self,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Perform async initialization, such as
+        launching a browser or creating a session.
+        This should be called before using any other method
+        if initialization is required.
+        """
+        ...
+    async def close(self) -> None:
+        """
+        Shutdown and clean up any resources.
+        """
+        ...
+    async def load_state(self) -> bool:
+        """
+        Restore session state from a persistent storage,
+        allowing the requester to resume a previous authenticated session.
+        :return: True if the session state was successfully loaded and applied.
+        """
+        ...
+    async def save_state(self) -> bool:
+        """
+        Persist the current session state to a file
+        or other storage, so that it can be restored in future sessions.
+        :return: True if the session state was successfully saved.
+        """
+        ...
+    async def set_interactive_mode(self, enable: bool) -> bool:
+        """
+        Enable or disable interactive mode for manual login.
+        :param enable: True to enable, False to disable interactive mode.
+        :return: True if operation or login check succeeded, False otherwise.
+        """
+        ...
+    @property
+    def requester_type(self) -> str:
+        ...
+    @property
+    def is_logged_in(self) -> bool:
+        """
+        Indicates whether the requester is currently authenticated.
+        """
+        ...
+    @property
+    def login_fields(self) -> list[LoginField]:
+        ...
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        tb: types.TracebackType | None,
+    ) -> None:
+        ...
+    async def __aenter__(self) -> Self:
+        ...

novel_downloader/core/interfaces/parser.py CHANGED Viewed

@@ -9,7 +9,7 @@ parsing individual chapter content, and setting parser context via book_id.
 from typing import Any, Protocol, runtime_checkable
-from novel_downloader.utils.chapter_storage import ChapterDict
+from novel_downloader.models import ChapterDict
 @runtime_checkable
@@ -17,33 +17,32 @@ class ParserProtocol(Protocol):
     """
     A parser must be able to:
       - extract book metadata from an HTML string,
-      - extract a single chapter's text from an HTML string,
-      - accept a book_id context for multi-step workflows.
+      - extract a single chapter's text from an HTML string
     """
     def parse_book_info(
         self,
-        html_str: list[str],
+        html_list: list[str],
         **kwargs: Any,
     ) -> dict[str, Any]:
         """
         Parse and return a dictionary of book information from the raw HTML.
-        :param html_str: The HTML of a book's info page.
+        :param html_list: The HTML list of a book's info pages.
         :return: A dict containing metadata like title, author, chapters list, etc.
         """
         ...
     def parse_chapter(
         self,
-        html_str: list[str],
+        html_list: list[str],
         chapter_id: str,
         **kwargs: Any,
     ) -> ChapterDict | None:
         """
         Parse and return the text content of one chapter.
-        :param html_str: The HTML of the chapter page.
+        :param html_list: The HTML list of the chapter pages.
         :param chapter_id: Identifier of the chapter being parsed.
         :return: The chapter's text.
         """

novel_downloader/core/parsers/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ for the novel_downloader framework.
 Modules:
 - biquge (笔趣阁)
 - esjzone (ESJ Zone)
+- linovelib (哔哩轻小说)
 - qianbi (铅笔小说)
 - qidian (起点中文网)
 - sfacg (SF轻小说)
@@ -19,11 +20,9 @@ Modules:
 from .biquge import BiqugeParser
 from .common import CommonParser
 from .esjzone import EsjzoneParser
+from .linovelib import LinovelibParser
 from .qianbi import QianbiParser
-from .qidian import (
-    QidianBrowserParser,
-    QidianSessionParser,
-)
+from .qidian import QidianParser
 from .sfacg import SfacgParser
 from .yamibo import YamiboParser
@@ -31,9 +30,9 @@ __all__ = [
     "BiqugeParser",
     "CommonParser",
     "EsjzoneParser",
+    "LinovelibParser",
     "QianbiParser",
-    "QidianBrowserParser",
-    "QidianSessionParser",
+    "QidianParser",
     "SfacgParser",
     "YamiboParser",
 ]

novel_downloader/core/parsers/base.py CHANGED Viewed

@@ -17,9 +17,8 @@ import abc
 from pathlib import Path
 from typing import Any
-from novel_downloader.config import ParserConfig
 from novel_downloader.core.interfaces import ParserProtocol
-from novel_downloader.utils.chapter_storage import ChapterDict
+from novel_downloader.models import ChapterDict, ParserConfig
 class BaseParser(ParserProtocol, abc.ABC):
@@ -51,33 +50,30 @@ class BaseParser(ParserProtocol, abc.ABC):
     @abc.abstractmethod
     def parse_book_info(
         self,
-        html_str: list[str],
+        html_list: list[str],
         **kwargs: Any,
     ) -> dict[str, Any]:
         """
-        Parse a book info page and extract metadata and chapter structure.
+        Parse and return a dictionary of book information from the raw HTML.
-        Depending on the site structure, the return dict may include a
-        flat `chapters` list or nested `volumes` with chapter groups.
-        :param html_str: Raw HTML of the book info page.
-        :return: Parsed metadata and chapter structure as a dictionary.
+        :param html_list: The HTML list of a book's info pages.
+        :return: A dict containing metadata like title, author, chapters list, etc.
         """
         ...
     @abc.abstractmethod
     def parse_chapter(
         self,
-        html_str: list[str],
+        html_list: list[str],
         chapter_id: str,
         **kwargs: Any,
     ) -> ChapterDict | None:
         """
-        Parse a single chapter page and extract clean text or simplified HTML.
+        Parse and return the text content of one chapter.
-        :param html_str: Raw HTML of the chapter page.
+        :param html_list: The HTML list of the chapter pages.
         :param chapter_id: Identifier of the chapter being parsed.
-        :return: Cleaned chapter content as plain text or minimal HTML.
+        :return: The chapter's text.
         """
         ...

novel_downloader/core/parsers/biquge/main_parser.py CHANGED Viewed

@@ -8,11 +8,10 @@ novel_downloader.core.parsers.biquge.main_parser
 import re
 from typing import Any
-from lxml import etree
-from lxml.etree import _Element
+from lxml import html
 from novel_downloader.core.parsers.base import BaseParser
-from novel_downloader.utils.chapter_storage import ChapterDict
+from novel_downloader.models import ChapterDict
 class BiqugeParser(BaseParser):
@@ -20,21 +19,21 @@ class BiqugeParser(BaseParser):
     def parse_book_info(
         self,
-        html_str: list[str],
+        html_list: list[str],
         **kwargs: Any,
     ) -> dict[str, Any]:
         """
         Parse a book info page and extract metadata and chapter structure.
-        :param html: Raw HTML of the book info page.
+        :param html_list: Raw HTML of the book info page.
         :return: Parsed metadata and chapter structure as a dictionary.
         """
-        if not html_str:
+        if not html_list:
             return {}
-        tree = etree.HTML(html_str[0])
+        tree = html.fromstring(html_list[0])
         result: dict[str, Any] = {}
-        def extract_text(elem: _Element | None) -> str:
+        def extract_text(elem: html.HtmlElement | None) -> str:
             if elem is None:
                 return ""
             return "".join(elem.itertext(tag=None)).strip()
@@ -79,7 +78,7 @@ class BiqugeParser(BaseParser):
                 text = "".join(elem.itertext()).strip()
                 in_main_volume = "正文" in text
             elif in_main_volume and elem.tag == "dd":
-                a: list[_Element] = elem.xpath("./a")
+                a: list[html.HtmlElement] = elem.xpath("./a")
                 if a:
                     title = "".join(a[0].itertext(tag=None)).strip()
                     url = a[0].get("href", "").strip()
@@ -96,20 +95,20 @@ class BiqugeParser(BaseParser):
     def parse_chapter(
         self,
-        html_str: list[str],
+        html_list: list[str],
         chapter_id: str,
         **kwargs: Any,
     ) -> ChapterDict | None:
         """
         Parse a single chapter page and extract clean text or simplified HTML.
-        :param html: Raw HTML of the chapter page.
+        :param html_list: Raw HTML of the chapter page.
         :param chapter_id: Identifier of the chapter being parsed.
         :return: Cleaned chapter content as plain text or minimal HTML.
         """
-        if not html_str:
+        if not html_list:
             return None
-        tree = etree.HTML(html_str[0], parser=None)
+        tree = html.fromstring(html_list[0], parser=None)
         # 提取标题
         title_elem = tree.xpath('//div[@class="bookname"]/h1')

novel_downloader/core/parsers/common/helper.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-novel_downloader.core.parsers.common.helpers
---------------------------------------------
+novel_downloader.core.parsers.common.helper
+-------------------------------------------
 Shared utility functions for parsing Common pages.
 """
@@ -13,7 +13,7 @@ from typing import Any, cast
 from bs4 import BeautifulSoup, Tag
-from novel_downloader.config import (
+from novel_downloader.models import (
     BookInfoRules,
     FieldRules,
     RuleStep,

novel_downloader/core/parsers/common/main_parser.py CHANGED Viewed

@@ -9,11 +9,14 @@ Common pages.
 from typing import Any
-from novel_downloader.config import ParserConfig, SiteRules
 from novel_downloader.core.parsers.base import BaseParser
-from novel_downloader.utils.chapter_storage import ChapterDict
+from novel_downloader.models import (
+    ChapterDict,
+    ParserConfig,
+    SiteRules,
+)
-from .helper import HTMLExtractor
+# from .helper import HTMLExtractor
 class CommonParser(BaseParser):
@@ -37,58 +40,60 @@ class CommonParser(BaseParser):
     def parse_book_info(
         self,
-        html_str: list[str],
+        html_list: list[str],
         **kwargs: Any,
     ) -> dict[str, Any]:
         """
         Parse a book info page and extract metadata and chapter structure.
-        :param html: Raw HTML of the book info page.
+        :param html_list: Raw HTML of the book info page.
         :return: Parsed metadata and chapter structure as a dictionary.
         """
-        if not html_str:
+        if not html_list:
             return {}
-        extractor = HTMLExtractor(html_str[0])
-        rules = self._site_rule["book_info"]
-        return extractor.extract_book_info(rules)
+        # extractor = HTMLExtractor(html_list[0])
+        # rules = self._site_rule["book_info"]
+        # return extractor.extract_book_info(rules)
+        return {}
     def parse_chapter(
         self,
-        html_str: list[str],
+        html_list: list[str],
         chapter_id: str,
         **kwargs: Any,
     ) -> ChapterDict | None:
         """
         Parse a single chapter page and extract clean text or simplified HTML.
-        :param html: Raw HTML of the chapter page.
+        :param html_list: Raw HTML of the chapter page.
         :param chapter_id: Identifier of the chapter being parsed.
         :return: Cleaned chapter content as plain text or minimal HTML.
         """
-        if not html_str:
+        if not html_list:
             return None
-        extractor = HTMLExtractor(html_str[0])
-        chapter_rules = self._site_rule["chapter"]
-        # 必须有正文内容
-        content_steps = chapter_rules.get("content")
-        if not content_steps:
-            raise ValueError(f"No chapter content steps defined for site: {self._site}")
-        title_steps = chapter_rules.get("title")
-        title = extractor.extract_field(title_steps["steps"]) if title_steps else ""
-        content = extractor.extract_field(content_steps["steps"])
-        if not content:
-            return None
-        return {
-            "id": chapter_id,
-            "title": title or "Untitled",
-            "content": content,
-            "extra": {
-                "site": self._site,
-            },
-        }
+        # extractor = HTMLExtractor(html_list[0])
+        # chapter_rules = self._site_rule["chapter"]
+        # # 必须有正文内容
+        # content_steps = chapter_rules.get("content")
+        # if not content_steps:
+        #     raise ValueError(f"No chapter content steps for site: {self._site}")
+        # title_steps = chapter_rules.get("title")
+        # title = extractor.extract_field(title_steps["steps"]) if title_steps else ""
+        # content = extractor.extract_field(content_steps["steps"])
+        # if not content:
+        #     return None
+        # return {
+        #     "id": chapter_id,
+        #     "title": title or "Untitled",
+        #     "content": content,
+        #     "extra": {
+        #         "site": self._site,
+        #     },
+        # }
+        return None
     @property
     def site(self) -> str:

novel-downloader 1.3.3__py3-none-any.whl → 1.4.1__py3-none-any.whl

novel-downloader 1.3.3py3-none-any.whl → 1.4.1py3-none-any.whl