PyPI - novel-downloader - Versions diffs - 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

novel-downloader 1.5.0py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (248) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +1 -3
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +26 -21
novel_downloader/cli/download.py +79 -66
novel_downloader/cli/export.py +17 -21
novel_downloader/cli/main.py +1 -1
novel_downloader/cli/search.py +62 -65
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +8 -5
novel_downloader/config/adapter.py +206 -209
novel_downloader/config/{loader.py → file_io.py} +53 -26
novel_downloader/core/__init__.py +5 -5
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +3 -24
novel_downloader/core/downloaders/base.py +49 -23
novel_downloader/core/downloaders/common.py +191 -137
novel_downloader/core/downloaders/qianbi.py +187 -146
novel_downloader/core/downloaders/qidian.py +187 -141
novel_downloader/core/downloaders/registry.py +4 -2
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +3 -20
novel_downloader/core/exporters/base.py +33 -37
novel_downloader/core/exporters/common/__init__.py +1 -2
novel_downloader/core/exporters/common/epub.py +15 -10
novel_downloader/core/exporters/common/main_exporter.py +19 -12
novel_downloader/core/exporters/common/txt.py +17 -12
novel_downloader/core/exporters/epub_util.py +59 -29
novel_downloader/core/exporters/linovelib/__init__.py +1 -0
novel_downloader/core/exporters/linovelib/epub.py +23 -25
novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
novel_downloader/core/exporters/linovelib/txt.py +20 -14
novel_downloader/core/exporters/qidian.py +2 -8
novel_downloader/core/exporters/registry.py +4 -2
novel_downloader/core/exporters/txt_util.py +7 -7
novel_downloader/core/fetchers/__init__.py +54 -48
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/lewenn.py +83 -0
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +5 -16
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/shuhaige.py +84 -0
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/wanbengo.py +83 -0
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +1 -9
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +6 -19
novel_downloader/core/interfaces/parser.py +7 -8
novel_downloader/core/interfaces/searcher.py +9 -1
novel_downloader/core/parsers/__init__.py +49 -12
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +64 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/esjzone.py +64 -69
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/linovelib.py +48 -64
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/qianbi.py +48 -50
novel_downloader/core/parsers/qidian/main_parser.py +756 -48
novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +5 -16
novel_downloader/core/parsers/sfacg.py +38 -45
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +429 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +87 -131
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +34 -3
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
novel_downloader/core/searchers/base.py +112 -36
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +43 -25
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +74 -40
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +24 -8
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +34 -85
novel_downloader/locales/zh.json +35 -86
novel_downloader/models/__init__.py +21 -22
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +4 -37
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +5 -0
novel_downloader/resources/config/settings.toml +8 -70
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +13 -24
novel_downloader/utils/chapter_storage.py +5 -5
novel_downloader/utils/constants.py +4 -31
novel_downloader/utils/cookies.py +38 -35
novel_downloader/utils/crypto_utils/__init__.py +7 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/crypto_utils/rc4.py +54 -0
novel_downloader/utils/epub/__init__.py +3 -4
novel_downloader/utils/epub/builder.py +6 -6
novel_downloader/utils/epub/constants.py +62 -21
novel_downloader/utils/epub/documents.py +95 -201
novel_downloader/utils/epub/models.py +8 -22
novel_downloader/utils/epub/utils.py +73 -106
novel_downloader/utils/file_utils/__init__.py +2 -23
novel_downloader/utils/file_utils/io.py +53 -188
novel_downloader/utils/file_utils/normalize.py +1 -7
novel_downloader/utils/file_utils/sanitize.py +4 -15
novel_downloader/utils/fontocr/__init__.py +5 -14
novel_downloader/utils/fontocr/core.py +216 -0
novel_downloader/utils/fontocr/loader.py +50 -0
novel_downloader/utils/logger.py +81 -65
novel_downloader/utils/network.py +17 -41
novel_downloader/utils/state.py +4 -90
novel_downloader/utils/text_utils/__init__.py +1 -7
novel_downloader/utils/text_utils/diff_display.py +5 -7
novel_downloader/utils/text_utils/text_cleaner.py +39 -30
novel_downloader/utils/text_utils/truncate_utils.py +3 -14
novel_downloader/utils/time_utils/__init__.py +5 -11
novel_downloader/utils/time_utils/datetime_utils.py +20 -29
novel_downloader/utils/time_utils/sleep_utils.py +55 -49
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.1.dist-info/METADATA +172 -0
novel_downloader-2.0.1.dist-info/RECORD +206 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
novel_downloader/core/downloaders/biquge.py +0 -29
novel_downloader/core/downloaders/esjzone.py +0 -29
novel_downloader/core/downloaders/linovelib.py +0 -29
novel_downloader/core/downloaders/sfacg.py +0 -29
novel_downloader/core/downloaders/yamibo.py +0 -29
novel_downloader/core/exporters/biquge.py +0 -22
novel_downloader/core/exporters/esjzone.py +0 -22
novel_downloader/core/exporters/qianbi.py +0 -22
novel_downloader/core/exporters/sfacg.py +0 -22
novel_downloader/core/exporters/yamibo.py +0 -22
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -422
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -209
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -198
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -326
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -194
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -234
novel_downloader/core/parsers/biquge.py +0 -139
novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/types.py +0 -13
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/crypto_utils.py +0 -71
novel_downloader/utils/fontocr/hash_store.py +0 -280
novel_downloader/utils/fontocr/hash_utils.py +0 -103
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -315
novel_downloader/utils/fontocr/ocr_v2.py +0 -764
novel_downloader/utils/fontocr/ocr_v3.py +0 -744
novel_downloader-1.5.0.dist-info/METADATA +0 -196
novel_downloader-1.5.0.dist-info/RECORD +0 -164
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0

novel_downloader/config/{loader.py → file_io.py} RENAMED Viewed

@@ -1,26 +1,23 @@
 #!/usr/bin/env python3
 """
-novel_downloader.config.loader
-------------------------------
+novel_downloader.config.file_io
+-------------------------------
-Provides functionality to load Toml configuration files into Python
-dictionaries, with robust error handling and fallback support.
+Provides functionality to load Toml configuration files into Python dict
 """
-__all__ = ["load_config"]
 import json
 import logging
 from pathlib import Path
-from typing import Any
+from typing import Any, TypeVar
-from novel_downloader.utils.cache import cached_load_config
 from novel_downloader.utils.constants import SETTING_FILE
+T = TypeVar("T")
 logger = logging.getLogger(__name__)
-def resolve_file_path(
+def _resolve_file_path(
     user_path: str | Path | None,
     local_filename: str | list[str],
     fallback_path: Path,
@@ -29,9 +26,9 @@ def resolve_file_path(
     Resolve the file path to use based on a prioritized lookup order.
     Priority:
-        1. A user-specified path (if provided and exists)
-        2. A file in the current working directory with the given name
-        3. A globally registered fallback path
+      1. A user-specified path (if provided and exists)
+      2. A file in the current working directory with the given name
+      3. A globally registered fallback path
     :param user_path: Optional user-specified file path.
     :param local_filename: File name to check in the current working directory.
@@ -117,7 +114,6 @@ def _load_by_extension(path: Path) -> dict[str, Any]:
         raise ValueError(f"Unsupported config file extension: {ext}")
-@cached_load_config
 def load_config(
     config_path: str | Path | None = None,
 ) -> dict[str, Any]:
@@ -125,9 +121,9 @@ def load_config(
     Load configuration data from a Toml file.
     :param config_path: Optional path to the Toml configuration file.
-    :return:            Parsed configuration as a dict.
+    :return: Parsed configuration as a dict.
     """
-    path = resolve_file_path(
+    path = _resolve_file_path(
         user_path=config_path,
         local_filename=[
             "settings.toml",
@@ -148,6 +144,46 @@ def load_config(
     return {}
+def get_config_value(keys: list[str], default: T) -> T:
+    """
+    Safely retrieve a nested config value.
+    """
+    cur = load_config()
+    for i, k in enumerate(keys):
+        if not isinstance(cur, dict):
+            return default
+        if i == len(keys) - 1:
+            val = cur.get(k, default)
+            return val if isinstance(val, type(default)) else default
+        cur = cur.get(k, {})
+    return default
+def save_config(
+    config: dict[str, Any],
+    output_path: str | Path = SETTING_FILE,
+) -> None:
+    """
+    Save configuration data to disk in JSON format.
+    :param config: Dictionary containing configuration data to save.
+    :param output_path: Destination path to save the config (default: SETTING_FILE).
+    :raises Exception: If writing to the file fails.
+    """
+    output = Path(output_path).expanduser().resolve()
+    output.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        with output.open("w", encoding="utf-8") as f:
+            json.dump(config, f, indent=2, ensure_ascii=False)
+    except Exception as e:
+        logger.error("[config] Failed to write config JSON '%s': %s", output, e)
+        raise
+    logger.info("[config] Configuration successfully saved to JSON: %s", output)
+    return
 def save_config_file(
     source_path: str | Path,
     output_path: str | Path = SETTING_FILE,
@@ -158,9 +194,9 @@ def save_config_file(
     :param source_path: The user-provided TOML file path.
     :param output_path: Destination path to save the config (default: SETTING_FILE).
+    :raises Exception: If writing to the file fails.
     """
     source = Path(source_path).expanduser().resolve()
-    output = Path(output_path).expanduser().resolve()
     if not source.is_file():
         raise FileNotFoundError(f"Source file not found: {source}")
@@ -171,14 +207,5 @@ def save_config_file(
         logger.error("[config] Failed to load config file: %s", e)
         raise ValueError(f"Invalid config file: {source}") from e
-    output.parent.mkdir(parents=True, exist_ok=True)
-    try:
-        with output.open("w", encoding="utf-8") as f:
-            json.dump(data, f, indent=2, ensure_ascii=False)
-    except Exception as e:
-        logger.error("[config] Failed to write config JSON '%s': %s", output, e)
-        raise
-    logger.info("[config] Configuration successfully saved to JSON: %s", output)
+    save_config(data, output_path)
     return

novel_downloader/core/__init__.py CHANGED Viewed

@@ -7,11 +7,11 @@ This package serves as the core layer of the novel_downloader system.
 It provides factory methods for constructing key components required for
 downloading and processing online novel content, including:
-- Downloader: Handles the full download lifecycle of a book or a batch of books.
-- Parser: Extracts structured data from HTML or SSR content.
-- Fetcher: Sends HTTP requests and manages sessions, including login if required.
-- Exporter: Responsible for exporting downloaded data into various output formats.
+  * Downloader: Handles the full download lifecycle of a book or a batch of books.
+  * Parser: Extracts structured data from HTML or SSR content.
+  * Fetcher: Sends HTTP requests and manages sessions, including login if required.
+  * Exporter: Responsible for exporting downloaded data into various output formats.
+  * search: Provides unified search functionality across supported novel sites.
 """
 __all__ = [

novel_downloader/core/archived/deqixs/fetcher.py ADDED Viewed

@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.archived.deqixs.fetcher
+---------------------------------------------
+"""
+from typing import Any
+from novel_downloader.core.fetchers.base import BaseSession
+from novel_downloader.models import FetcherConfig
+from novel_downloader.utils import async_jitter_sleep
+# from novel_downloader.core.fetchers.registry import register_fetcher
+# @register_fetcher(
+#     site_keys=["deqixs"],
+# )
+class DeqixsSession(BaseSession):
+    """
+    A session class for interacting with the 得奇小说网 (www.deqixs.com) novel website.
+    """
+    BASE_URL = "https://www.deqixs.com"
+    BOOK_INFO_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/"
+    CHAPTER_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/{chapter_id}.html"
+    def __init__(
+        self,
+        config: FetcherConfig,
+        cookies: dict[str, str] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__("deqixs", config, cookies, **kwargs)
+    async def get_book_info(
+        self,
+        book_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of the book info page asynchronously.
+        :param book_id: The book identifier.
+        :return: The page content as a string.
+        """
+        url = self.book_info_url(book_id=book_id)
+        return [await self.fetch(url, **kwargs)]
+    async def get_book_chapter(
+        self,
+        book_id: str,
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of a single chapter asynchronously.
+        :param book_id: The book identifier.
+        :param chapter_id: The chapter identifier.
+        :return: The chapter content as a string.
+        """
+        html_pages: list[str] = []
+        idx = 1
+        while True:
+            chapter_suffix = chapter_id if idx == 1 else f"{chapter_id}-{idx}"
+            relative_path = f"/xiaoshuo/{book_id}/{chapter_suffix}.html"
+            full_url = self.BASE_URL + relative_path
+            if idx > 1 and relative_path not in html_pages[-1]:
+                break
+            try:
+                html = await self.fetch(full_url, **kwargs)
+            except Exception as exc:
+                self.logger.warning(
+                    "[async] get_book_chapter(%s page %d) failed: %s",
+                    chapter_id,
+                    idx,
+                    exc,
+                )
+                break
+            html_pages.append(html)
+            idx += 1
+            await async_jitter_sleep(
+                self.request_interval,
+                mul_spread=1.1,
+                max_sleep=self.request_interval + 2,
+            )
+        return html_pages
+    @classmethod
+    def book_info_url(cls, book_id: str) -> str:
+        """
+        Construct the URL for fetching a book's info page.
+        :param book_id: The identifier of the book.
+        :return: Fully qualified URL for the book info page.
+        """
+        return cls.BOOK_INFO_URL.format(book_id=book_id)
+    @classmethod
+    def chapter_url(cls, book_id: str, chapter_id: str) -> str:
+        """
+        Construct the URL for fetching a specific chapter.
+        :param book_id: The identifier of the book.
+        :param chapter_id: The identifier of the chapter.
+        :return: Fully qualified chapter URL.
+        """
+        return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)

novel_downloader/core/archived/deqixs/parser.py ADDED Viewed

@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.archived.deqixs.parser
+--------------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+# from novel_downloader.core.parsers.registry import register_parser
+# @register_parser(
+#     site_keys=["deqixs"],
+# )
+class DeqixsParser(BaseParser):
+    """
+    Parser for 得奇小说网 book pages.
+    """
+    ADS: set[str] = {
+        "更新不易",
+        "记得分享",
+        "(本章完)",
+    }
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        # Extract book title and word count
+        book_name = tree.xpath("//div[@class='itemtxt']/h1/a/text()")[0].strip()
+        word_count = tree.xpath("//div[@class='itemtxt']/h1/i/text()")[0].strip()
+        # Extract serialization status and genre tags
+        spans = tree.xpath("//div[@class='itemtxt']/p[1]/span/text()")
+        serial_status = spans[0].strip() if spans else ""
+        tags = [s.strip() for s in spans[1:-1]] if len(spans) > 2 else []
+        # Extract author
+        author_text = tree.xpath("//div[@class='itemtxt']/p[2]/a/text()")[0]
+        author = author_text.replace("作者：", "").strip()
+        # Extract cover URL
+        cover_src = tree.xpath("//div[@class='item']//a/img/@src")[0]
+        cover_url = "https:" + cover_src if cover_src.startswith("//") else cover_src
+        # Extract last update time
+        update_raw = tree.xpath("//h2[@id='dir']/span/text()")[0].strip()
+        update_time = update_raw.replace("更新时间：", "").strip()
+        # Extract summary paragraphs (first description block)
+        paras = tree.xpath("(//div[@class='des bb'])[1]/p/text()")
+        summary = "\n".join(p.strip() for p in paras if p.strip())
+        # Extract chapters list
+        chapter_nodes = tree.xpath("//div[@id='list']//ul/li/a")
+        chapters: list[ChapterInfoDict] = []
+        for a in chapter_nodes:
+            href = a.get("href")
+            chapter_id = href.split("/")[-1].replace(".html", "")
+            title = a.text_content().strip()
+            chapters.append({"title": title, "url": href, "chapterId": chapter_id})
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "serial_status": serial_status,
+            "word_count": word_count,
+            "summary": summary,
+            "tags": tags,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        title_text = ""
+        contents: list[str] = []
+        for curr_html in html_list:
+            tree = html.fromstring(curr_html)
+            # Extract title once
+            if not title_text:
+                full_title = tree.xpath("string(//div[@class='submenu']/h1)")
+                if ">" in full_title:
+                    title_text = full_title.split(">", 1)[1].strip()
+                else:
+                    title_text = full_title.strip()
+            # Extract paragraphs
+            for p in tree.xpath("//div[@class='con']/p"):
+                text = p.text_content().strip()
+                # Filter out ads or empty paragraphs
+                if not text or any(ad in text for ad in self.ADS):
+                    continue
+                contents.append(text)
+        content = "\n".join(contents)
+        if not content:
+            return None
+        return {
+            "id": chapter_id,
+            "title": title_text,
+            "content": content,
+            "extra": {"site": "deqixs"},
+        }

novel_downloader/core/archived/deqixs/searcher.py ADDED Viewed

@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.archived.deqixs.searcher
+----------------------------------------------
+"""
+import logging
+from lxml import html
+from novel_downloader.core.searchers.base import BaseSearcher
+from novel_downloader.models import SearchResult
+# from novel_downloader.core.searchers.registry import register_searcher
+logger = logging.getLogger(__name__)
+# @register_searcher(
+#     site_keys=["deqixs"],
+# )
+class DeqixsSearcher(BaseSearcher):
+    site_name = "deqixs"
+    priority = 20
+    BASE_URL = "https://www.deqixs.com"
+    SEARCH_URL = "https://www.deqixs.com/tag/"
+    @classmethod
+    async def _fetch_html(cls, keyword: str) -> str:
+        params = {"key": keyword}
+        try:
+            async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
+                return await cls._response_to_str(resp)
+        except Exception:
+            logger.error(
+                "Failed to fetch HTML for keyword '%s' from '%s'",
+                keyword,
+                cls.SEARCH_URL,
+            )
+            return ""
+    @classmethod
+    def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
+        doc = html.fromstring(html_str)
+        rows = doc.xpath("//div[@class='container']/div[@class='item']")
+        results: list[SearchResult] = []
+        for idx, row in enumerate(rows):
+            if limit is not None and idx >= limit:
+                break
+            href = row.xpath(".//h3/a/@href")[0]
+            book_id = href.strip("/ ").split("/")[-1]
+            if not book_id:
+                continue
+            book_url = cls.BASE_URL + href
+            img_src = row.xpath(".//a/img/@src")[0]
+            cover_url = "https:" + img_src if img_src.startswith("//") else img_src
+            title = row.xpath(".//h3/a/text()")[0].strip()
+            author_text = row.xpath(".//p[2]/a/text()")[0]
+            author = author_text.replace("作者：", "").strip()
+            spans = row.xpath(".//p[1]/span/text()")
+            word_count = spans[2].strip() if len(spans) > 2 else ""
+            # Extract latest chapter and update date
+            first_li = row.xpath(".//ul/li")[0]
+            update_date = first_li.xpath("./i/text()")[0].strip()
+            latest_chapter = first_li.xpath("./a/text()")[0].strip()
+            # Compute priority
+            prio = cls.priority + idx
+            results.append(
+                SearchResult(
+                    site=cls.site_name,
+                    book_id=book_id,
+                    book_url=book_url,
+                    cover_url=cover_url,
+                    title=title,
+                    author=author,
+                    latest_chapter=latest_chapter,
+                    update_date=update_date,
+                    word_count=word_count,
+                    priority=prio,
+                )
+            )
+        return results

novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} RENAMED Viewed

@@ -1,14 +1,13 @@
 #!/usr/bin/env python3
 """
-novel_downloader.core.searchers.qidian
---------------------------------------
+novel_downloader.core.archived.qidian.searcher
+----------------------------------------------
 """
 import logging
 from lxml import html
 from novel_downloader.core.searchers.base import BaseSearcher
 from novel_downloader.models import SearchResult
@@ -28,35 +27,21 @@ class QidianSearcher(BaseSearcher):
     SEARCH_URL = "https://www.qidian.com/so/{query}.html"
     @classmethod
-    def _fetch_html(cls, keyword: str) -> str:
-        """
-        Fetch raw HTML from Qidian's search page.
-        :param keyword: The search term to query on Qidian.
-        :return: HTML text of the search results page, or an empty string on fail.
-        """
+    async def _fetch_html(cls, keyword: str) -> str:
         url = cls.SEARCH_URL.format(query=cls._quote(keyword))
         try:
-            response = cls._http_get(url)
-            return response.text
+            async with (await cls._http_get(url)) as resp:
+                return await cls._response_to_str(resp)
         except Exception:
             logger.error(
                 "Failed to fetch HTML for keyword '%s' from '%s'",
                 keyword,
                 url,
-                exc_info=True,
             )
             return ""
     @classmethod
     def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
-        """
-        Parse raw HTML from Qidian search results into list of SearchResult.
-        :param html_str: Raw HTML string from Qidian search results page.
-        :param limit: Maximum number of results to return, or None for all.
-        :return: List of SearchResult dicts.
-        """
         doc = html.fromstring(html_str)
         items = doc.xpath(
             '//div[@id="result-list"]//li[contains(@class, "res-book-item")]'
@@ -68,6 +53,8 @@ class QidianSearcher(BaseSearcher):
             if limit is not None and idx >= limit:
                 break
             book_id = item.get("data-bid")
+            if not book_id:
+                continue
             title_elem = item.xpath('.//h3[@class="book-info-title"]/a')[0]
             title = title_elem.text_content().strip()
             author_nodes = item.xpath(
@@ -79,8 +66,13 @@ class QidianSearcher(BaseSearcher):
                 SearchResult(
                     site=cls.site_name,
                     book_id=book_id,
+                    book_url="",
+                    cover_url="",
                     title=title,
                     author=author,
+                    latest_chapter="-",
+                    update_date="-",
+                    word_count="-",
                     priority=prio,
                 )
             )

novel_downloader/core/archived/wanbengo/searcher.py ADDED Viewed

@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.archived.wanbengo.searcher
+------------------------------------------------
+"""
+import logging
+from lxml import html
+from novel_downloader.core.searchers.base import BaseSearcher
+from novel_downloader.models import SearchResult
+# from novel_downloader.core.searchers.registry import register_searcher
+logger = logging.getLogger(__name__)
+# @register_searcher(
+#     site_keys=["wanbengo"],
+# )
+class WanbengoSearcher(BaseSearcher):
+    site_name = "wanbengo"
+    priority = 30
+    BASE_URL = "https://www.wanbengo.com"
+    SEARCH_URL = "https://www.sososhu.com/"
+    @classmethod
+    async def _fetch_html(cls, keyword: str) -> str:
+        params = {
+            "q": keyword,
+            "site": "wbsz",
+        }
+        try:
+            async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
+                return await cls._response_to_str(resp)
+        except Exception:
+            logger.error(
+                "Failed to fetch HTML for keyword '%s' from '%s'",
+                keyword,
+                cls.SEARCH_URL,
+            )
+            return ""
+    @classmethod
+    def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
+        doc = html.fromstring(html_str)
+        rows = doc.xpath(
+            "//div[contains(@class,'so_list')]//div[contains(@class,'hot')]//div[contains(@class,'item')]"
+        )
+        results: list[SearchResult] = []
+        for idx, row in enumerate(rows):
+            if limit is not None and idx >= limit:
+                break
+            a_nodes = row.xpath(".//dl/dt/a[1]")
+            a = a_nodes[0] if a_nodes else None
+            href = a.get("href") if a is not None else ""
+            if not href:
+                continue
+            book_url = cls._restore_url(cls._abs_url(href))
+            book_id = cls._book_id_from_url(book_url) if book_url else ""
+            title = (a.text_content() if a is not None else "").strip()
+            author = cls._first_str(row.xpath(".//dl/dt/span[1]/text()"))
+            cover_url = cls._first_str(
+                row.xpath(".//div[contains(@class,'image')]//img/@src")
+            )
+            # Compute priority
+            prio = cls.priority + idx
+            results.append(
+                SearchResult(
+                    site=cls.site_name,
+                    book_id=book_id,
+                    book_url=book_url,
+                    cover_url=cover_url,
+                    title=title,
+                    author=author,
+                    latest_chapter="-",
+                    update_date="-",
+                    word_count="-",
+                    priority=prio,
+                )
+            )
+        return results
+    @staticmethod
+    def _restore_url(url: str) -> str:
+        return url.replace("www.wbsz.org", "www.wanbengo.com")
+    @staticmethod
+    def _book_id_from_url(url: str) -> str:
+        tail = url.split("wanbengo.com", 1)[-1]
+        tail = tail.strip("/")
+        return tail.replace("/", "-")

novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

novel-downloader 1.5.0py3-none-any.whl → 2.0.1py3-none-any.whl