PyPI - novel-downloader - Versions diffs - 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

novel-downloader 1.4.5py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +2 -4
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +27 -104
novel_downloader/cli/download.py +78 -66
novel_downloader/cli/export.py +20 -21
novel_downloader/cli/main.py +3 -1
novel_downloader/cli/search.py +120 -0
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +10 -14
novel_downloader/config/adapter.py +195 -99
novel_downloader/config/{loader.py → file_io.py} +53 -27
novel_downloader/core/__init__.py +14 -13
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/archived/qidian/searcher.py +79 -0
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +8 -30
novel_downloader/core/downloaders/base.py +182 -30
novel_downloader/core/downloaders/common.py +217 -384
novel_downloader/core/downloaders/qianbi.py +332 -4
novel_downloader/core/downloaders/qidian.py +250 -290
novel_downloader/core/downloaders/registry.py +69 -0
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +8 -26
novel_downloader/core/exporters/base.py +107 -31
novel_downloader/core/exporters/common/__init__.py +3 -4
novel_downloader/core/exporters/common/epub.py +92 -171
novel_downloader/core/exporters/common/main_exporter.py +14 -67
novel_downloader/core/exporters/common/txt.py +90 -86
novel_downloader/core/exporters/epub_util.py +184 -1327
novel_downloader/core/exporters/linovelib/__init__.py +3 -2
novel_downloader/core/exporters/linovelib/epub.py +165 -222
novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
novel_downloader/core/exporters/linovelib/txt.py +76 -66
novel_downloader/core/exporters/qidian.py +15 -11
novel_downloader/core/exporters/registry.py +55 -0
novel_downloader/core/exporters/txt_util.py +67 -0
novel_downloader/core/fetchers/__init__.py +57 -56
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
novel_downloader/core/fetchers/biquyuedu.py +83 -0
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +60 -0
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +8 -14
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +4 -17
novel_downloader/core/interfaces/parser.py +5 -6
novel_downloader/core/interfaces/searcher.py +26 -0
novel_downloader/core/parsers/__init__.py +58 -22
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +63 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
novel_downloader/core/parsers/qidian/__init__.py +2 -2
novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
novel_downloader/core/parsers/qidian/main_parser.py +19 -57
novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +57 -0
novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +435 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +155 -0
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +51 -0
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/b520.py +84 -0
novel_downloader/core/searchers/base.py +168 -0
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +102 -0
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +165 -0
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +79 -0
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +36 -79
novel_downloader/locales/zh.json +37 -80
novel_downloader/models/__init__.py +23 -50
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +16 -43
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +21 -0
novel_downloader/resources/config/settings.toml +39 -74
novel_downloader/resources/css_styles/intro.css +83 -0
novel_downloader/resources/css_styles/main.css +30 -89
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +43 -0
novel_downloader/utils/chapter_storage.py +247 -226
novel_downloader/utils/constants.py +5 -50
novel_downloader/utils/cookies.py +6 -18
novel_downloader/utils/crypto_utils/__init__.py +13 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
novel_downloader/utils/epub/__init__.py +34 -0
novel_downloader/utils/epub/builder.py +377 -0
novel_downloader/utils/epub/constants.py +118 -0
novel_downloader/utils/epub/documents.py +297 -0
novel_downloader/utils/epub/models.py +120 -0
novel_downloader/utils/epub/utils.py +179 -0
novel_downloader/utils/file_utils/__init__.py +5 -30
novel_downloader/utils/file_utils/io.py +9 -150
novel_downloader/utils/file_utils/normalize.py +2 -2
novel_downloader/utils/file_utils/sanitize.py +2 -7
novel_downloader/utils/fontocr.py +207 -0
novel_downloader/utils/i18n.py +2 -0
novel_downloader/utils/logger.py +10 -16
novel_downloader/utils/network.py +111 -252
novel_downloader/utils/state.py +5 -90
novel_downloader/utils/text_utils/__init__.py +16 -21
novel_downloader/utils/text_utils/diff_display.py +6 -9
novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
novel_downloader/utils/text_utils/text_cleaner.py +179 -0
novel_downloader/utils/text_utils/truncate_utils.py +62 -0
novel_downloader/utils/time_utils/__init__.py +6 -12
novel_downloader/utils/time_utils/datetime_utils.py +23 -33
novel_downloader/utils/time_utils/sleep_utils.py +5 -10
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.0.dist-info/METADATA +171 -0
novel_downloader-2.0.0.dist-info/RECORD +210 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
novel_downloader/config/site_rules.py +0 -94
novel_downloader/core/downloaders/biquge.py +0 -25
novel_downloader/core/downloaders/esjzone.py +0 -25
novel_downloader/core/downloaders/linovelib.py +0 -25
novel_downloader/core/downloaders/sfacg.py +0 -25
novel_downloader/core/downloaders/yamibo.py +0 -25
novel_downloader/core/exporters/biquge.py +0 -25
novel_downloader/core/exporters/esjzone.py +0 -25
novel_downloader/core/exporters/qianbi.py +0 -25
novel_downloader/core/exporters/sfacg.py +0 -25
novel_downloader/core/exporters/yamibo.py +0 -25
novel_downloader/core/factory/__init__.py +0 -20
novel_downloader/core/factory/downloader.py +0 -73
novel_downloader/core/factory/exporter.py +0 -58
novel_downloader/core/factory/fetcher.py +0 -96
novel_downloader/core/factory/parser.py +0 -86
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -403
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/common/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -204
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -193
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -318
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -189
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -229
novel_downloader/core/parsers/biquge/__init__.py +0 -10
novel_downloader/core/parsers/biquge/main_parser.py +0 -134
novel_downloader/core/parsers/common/__init__.py +0 -13
novel_downloader/core/parsers/common/helper.py +0 -323
novel_downloader/core/parsers/common/main_parser.py +0 -106
novel_downloader/core/parsers/esjzone/__init__.py +0 -10
novel_downloader/core/parsers/linovelib/__init__.py +0 -10
novel_downloader/core/parsers/qianbi/__init__.py +0 -10
novel_downloader/core/parsers/sfacg/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
novel_downloader/models/browser.py +0 -21
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/site_rules.py +0 -99
novel_downloader/models/tasks.py +0 -33
novel_downloader/models/types.py +0 -15
novel_downloader/resources/css_styles/volume-intro.css +0 -56
novel_downloader/resources/json/replace_word_map.json +0 -4
novel_downloader/resources/text/blacklist.txt +0 -22
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/fontocr/__init__.py +0 -22
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -303
novel_downloader/utils/fontocr/ocr_v2.py +0 -752
novel_downloader/utils/hash_store.py +0 -279
novel_downloader/utils/hash_utils.py +0 -103
novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
novel_downloader/utils/text_utils/font_mapping.py +0 -28
novel_downloader/utils/text_utils/text_cleaning.py +0 -107
novel_downloader-1.4.5.dist-info/METADATA +0 -196
novel_downloader-1.4.5.dist-info/RECORD +0 -165
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0

novel_downloader/core/fetchers/xiguashuwu.py ADDED Viewed

@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.fetchers.xiguashuwu
+-----------------------------------------
+"""
+from typing import Any
+from novel_downloader.core.fetchers.base import BaseSession
+from novel_downloader.core.fetchers.registry import register_fetcher
+from novel_downloader.models import FetcherConfig
+from novel_downloader.utils import async_jitter_sleep
+@register_fetcher(
+    site_keys=["xiguashuwu"],
+)
+class XiguashuwuSession(BaseSession):
+    """
+    A session class for interacting with the 西瓜书屋 (www.xiguashuwu.com) novel.
+    """
+    BASE_URL = "https://www.xiguashuwu.com"
+    BOOK_INFO_URL = "https://www.xiguashuwu.com/book/{book_id}/iszip/0/"
+    BOOK_CATALOG_URL = "https://www.xiguashuwu.com/book/{book_id}/catalog/"
+    CHAPTER_URL = "https://www.xiguashuwu.com/book/{book_id}/{chapter_id}.html"
+    def __init__(
+        self,
+        config: FetcherConfig,
+        cookies: dict[str, str] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__("xiguashuwu", config, cookies, **kwargs)
+    async def get_book_info(
+        self,
+        book_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of the book info page asynchronously.
+        Order: [info, catalogs1, ..., catalogsN]
+        :param book_id: The book identifier.
+        :return: The page content as string list.
+        """
+        info_url = self.book_info_url(book_id=book_id)
+        info_html = await self.fetch(info_url, **kwargs)
+        catalog_url = self.book_catalog_url(book_id=book_id)
+        catalog_pages: list[str] = []
+        idx = 1
+        while True:
+            suffix = "" if idx == 1 else f"{idx}.html"
+            full_url = catalog_url + suffix
+            try:
+                html = await self.fetch(full_url, **kwargs)
+            except Exception as exc:
+                self.logger.warning(
+                    "[async] get_book_catalog(%s page %d) failed: %s",
+                    book_id,
+                    idx,
+                    exc,
+                )
+                break
+            catalog_pages.append(html)
+            idx += 1
+            next_patterns = [
+                # f"javascript:readbook('{book_id}','{idx}');",
+                # f"javascript:gobook('{book_id}','{idx}');",
+                # f"javascript:runbook('{book_id}','{idx}');",
+                # f"javascript:gotochapter('{book_id}','{idx}');",
+                f"javascript:readbookjump('{book_id}','{idx}');",
+                f"javascript:gobookjump('{book_id}','{idx}');",
+                f"javascript:runbookjump('{book_id}','{idx}');",
+                f"javascript:gotojump('{book_id}','{idx}');",
+                f"javascript:gotochapterjump('{book_id}','{idx}');",
+                f"/book/{book_id}/catalog/{idx}.html",
+            ]
+            if not any(pat in html for pat in next_patterns):
+                break
+            await async_jitter_sleep(
+                self.request_interval,
+                mul_spread=1.1,
+                max_sleep=self.request_interval + 2,
+            )
+        return [info_html, *catalog_pages]
+    async def get_book_chapter(
+        self,
+        book_id: str,
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of a single chapter asynchronously.
+        Order: [page1, ..., pageN]
+        :param book_id: The book identifier.
+        :param chapter_id: The chapter identifier.
+        :return: The page content as string list.
+        """
+        html_pages: list[str] = []
+        idx = 1
+        while True:
+            chapter_suffix = chapter_id if idx == 1 else f"{chapter_id}_{idx}"
+            relative_path = self.relative_chapter_url(book_id, chapter_suffix)
+            if idx > 1 and relative_path not in html_pages[-1]:
+                break
+            full_url = self.BASE_URL + relative_path
+            try:
+                html = await self.fetch(full_url, **kwargs)
+            except Exception as exc:
+                self.logger.warning(
+                    "[async] get_book_chapter(%s page %d) failed: %s",
+                    chapter_id,
+                    idx,
+                    exc,
+                )
+                break
+            html_pages.append(html)
+            idx += 1
+            await async_jitter_sleep(
+                self.request_interval,
+                mul_spread=1.1,
+                max_sleep=self.request_interval + 2,
+            )
+        return html_pages
+    @classmethod
+    def book_info_url(cls, book_id: str) -> str:
+        """
+        Construct the URL for fetching a book's info page.
+        :param book_id: The identifier of the book.
+        :return: Fully qualified URL for the book info page.
+        """
+        return cls.BOOK_INFO_URL.format(book_id=book_id)
+    @classmethod
+    def book_catalog_url(cls, book_id: str) -> str:
+        """
+        Construct the URL for fetching a book's catalog page.
+        :param book_id: The identifier of the book.
+        :return: Fully qualified catalog page URL.
+        """
+        return cls.BOOK_CATALOG_URL.format(book_id=book_id)
+    @classmethod
+    def chapter_url(cls, book_id: str, chapter_id: str) -> str:
+        """
+        Construct the URL for fetching a specific chapter.
+        :param book_id: The identifier of the book.
+        :param chapter_id: The identifier of the chapter.
+        :return: Fully qualified chapter URL.
+        """
+        return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
+    @classmethod
+    def relative_chapter_url(cls, book_id: str, chapter_id: str) -> str:
+        """
+        Return the relative URL path for a given chapter.
+        """
+        return f"/book/{book_id}/{chapter_id}.html"

novel_downloader/core/fetchers/xs63b.py ADDED Viewed

@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.fetchers.xs63b
+------------------------------------
+"""
+import asyncio
+import base64
+import re
+from typing import Any
+from novel_downloader.core.fetchers.base import BaseSession
+from novel_downloader.core.fetchers.registry import register_fetcher
+from novel_downloader.models import FetcherConfig
+from novel_downloader.utils import async_jitter_sleep
+@register_fetcher(
+    site_keys=["xs63b"],
+)
+class Xs63bSession(BaseSession):
+    """
+    A session class for interacting with the 小说路上 (m.xs63b.com) novel website.
+    """
+    BOOK_INFO_URL = "https://m.xs63b.com/{book_id}/"
+    BOOK_CATALOG_URL = "https://www.xs63b.com/{book_id}/"
+    CHAPTER_URL = "https://m.xs63b.com/{book_id}/{chapter_id}.html"
+    _JSARR_PATTERN = re.compile(r"var\s+jsarr\s*=\s*\[([^\]]+)\]")
+    _JSSTR_PATTERN = re.compile(r"var\s+jsstr\s*=\s*\"([^\"]+)\";")
+    def __init__(
+        self,
+        config: FetcherConfig,
+        cookies: dict[str, str] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__("xs63b", config, cookies, **kwargs)
+    async def get_book_info(
+        self,
+        book_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of the book info page asynchronously.
+        Order: [info, catalog]
+        :param book_id: The book identifier.
+        :return: The page content as string list.
+        """
+        book_id = book_id.replace("-", "/")
+        info_url = self.book_info_url(book_id=book_id)
+        catalog_url = self.book_catalog_url(book_id=book_id)
+        info_html, catalog_html = await asyncio.gather(
+            self.fetch(info_url, ssl=False, **kwargs),
+            self.fetch(catalog_url, ssl=False, **kwargs),
+        )
+        return [info_html, catalog_html]
+    async def get_book_chapter(
+        self,
+        book_id: str,
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of a single chapter asynchronously.
+        Order: [page1, ..., pageN]
+        :param book_id: The book identifier.
+        :param chapter_id: The chapter identifier.
+        :return: The page content as string list.
+        """
+        book_id = book_id.replace("-", "/")
+        html_pages: list[str] = []
+        chapter_url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
+        while True:
+            try:
+                html = await self.fetch(chapter_url, **kwargs)
+            except Exception as exc:
+                self.logger.warning(
+                    "[async] get_book_chapter(%s page %d) failed: %s",
+                    chapter_url,
+                    exc,
+                )
+                break
+            html_pages.append(html)
+            if "/xs635/mobile/images/nextpage.png" not in html:
+                break
+            jsarr = self._parse_jsarr(html)
+            jsstr = self._parse_jsstr(html)
+            chapter_url = self._build_chapter_url(book_id, jsarr, jsstr)
+            await async_jitter_sleep(
+                self.request_interval,
+                mul_spread=1.1,
+                max_sleep=self.request_interval + 2,
+            )
+        return html_pages
+    @classmethod
+    def book_info_url(cls, book_id: str) -> str:
+        """
+        Construct the URL for fetching a book's info page.
+        :param book_id: The identifier of the book.
+        :return: Fully qualified URL for the book info page.
+        """
+        return cls.BOOK_INFO_URL.format(book_id=book_id)
+    @classmethod
+    def book_catalog_url(cls, book_id: str) -> str:
+        """
+        Construct the URL for fetching a book's catalog page.
+        :param book_id: The identifier of the book.
+        :return: Fully qualified catalog page URL.
+        """
+        return cls.BOOK_CATALOG_URL.format(book_id=book_id)
+    @classmethod
+    def chapter_url(cls, book_id: str, chapter_id: str) -> str:
+        """
+        Construct the URL for fetching a specific chapter.
+        :param book_id: The identifier of the book.
+        :param chapter_id: The identifier of the chapter.
+        :return: Fully qualified chapter URL.
+        """
+        return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
+    @classmethod
+    def _parse_jsarr(cls, text: str) -> list[int]:
+        """
+        Extract jsarr from `var jsarr = [...];`.
+        Raises ValueError if not found.
+        """
+        m = cls._JSARR_PATTERN.search(text)
+        if not m:
+            raise ValueError("jsarr not found")
+        return [int(x) for x in m.group(1).split(",")]
+    @classmethod
+    def _parse_jsstr(cls, text: str) -> str:
+        """
+        Extract jsstr from `var jsstr = "...";`.
+        Raises ValueError if not found.
+        """
+        m = cls._JSSTR_PATTERN.search(text)
+        if not m:
+            raise ValueError("jsstr not found")
+        return m.group(1)
+    @staticmethod
+    def _build_chapter_url(book_id: str, jsarr: list[int], jsstr: str) -> str:
+        decoded = base64.b64decode(jsstr).decode("utf-8")
+        nnarr = list(decoded)
+        nnstr = "".join(nnarr[i] for i in jsarr)
+        return f"https://m.xs63b.com/{book_id}/{nnstr}.html"

novel_downloader/core/fetchers/xshbook.py ADDED Viewed

@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.fetchers.xshbook
+--------------------------------------
+"""
+from typing import Any
+from novel_downloader.core.fetchers.base import BaseSession
+from novel_downloader.core.fetchers.registry import register_fetcher
+from novel_downloader.models import FetcherConfig
+@register_fetcher(
+    site_keys=["xshbook"],
+)
+class XshbookSession(BaseSession):
+    """
+    A session class for interacting with the 小说虎 (www.xshbook.com) novel website.
+    """
+    BOOK_INFO_URL = "https://www.xshbook.com/{book_id}/"
+    CHAPTER_URL = "https://www.xshbook.com/{book_id}/{chapter_id}.html"
+    def __init__(
+        self,
+        config: FetcherConfig,
+        cookies: dict[str, str] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__("xshbook", config, cookies, **kwargs)
+    async def get_book_info(
+        self,
+        book_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of the book info page asynchronously.
+        :param book_id: The book identifier.
+        :return: The page content as string list.
+        """
+        book_id = book_id.replace("-", "/")
+        url = self.book_info_url(book_id=book_id)
+        return [await self.fetch(url, **kwargs)]
+    async def get_book_chapter(
+        self,
+        book_id: str,
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of a single chapter asynchronously.
+        :param book_id: The book identifier.
+        :param chapter_id: The chapter identifier.
+        :return: The page content as string list.
+        """
+        book_id = book_id.replace("-", "/")
+        url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
+        return [await self.fetch(url, **kwargs)]
+    @classmethod
+    def book_info_url(cls, book_id: str) -> str:
+        """
+        Construct the URL for fetching a book's info page.
+        :param book_id: The identifier of the book.
+        :return: Fully qualified URL for the book info page.
+        """
+        return cls.BOOK_INFO_URL.format(book_id=book_id)
+    @classmethod
+    def chapter_url(cls, book_id: str, chapter_id: str) -> str:
+        """
+        Construct the URL for fetching a specific chapter.
+        :param book_id: The identifier of the book.
+        :param chapter_id: The identifier of the chapter.
+        :return: Fully qualified chapter URL.
+        """
+        return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)

novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} RENAMED Viewed

@@ -1,22 +1,27 @@
 #!/usr/bin/env python3
 """
-novel_downloader.core.fetchers.yamibo.session
----------------------------------------------
+novel_downloader.core.fetchers.yamibo
+-------------------------------------
 """
+from collections.abc import Mapping
 from typing import Any
 from lxml import html
 from novel_downloader.core.fetchers.base import BaseSession
+from novel_downloader.core.fetchers.registry import register_fetcher
 from novel_downloader.models import FetcherConfig, LoginField
-from novel_downloader.utils.time_utils import async_sleep_with_random_delay
+from novel_downloader.utils import async_jitter_sleep
+@register_fetcher(
+    site_keys=["yamibo"],
+)
 class YamiboSession(BaseSession):
     """
-    A session class for interacting with the Yamibo (www.yamibo.com) novel website.
+    A session class for interacting with the 百合会 (www.yamibo.com) novel website.
     """
     BASE_URL = "https://www.yamibo.com"
@@ -64,7 +69,7 @@ class YamiboSession(BaseSession):
             ):
                 self._is_logged_in = True
                 return True
-            await async_sleep_with_random_delay(
+            await async_jitter_sleep(
                 self.backoff_factor,
                 mul_spread=1.1,
                 max_sleep=self.backoff_factor + 2,
@@ -82,7 +87,7 @@ class YamiboSession(BaseSession):
         Fetch the raw HTML of the book info page asynchronously.
         :param book_id: The book identifier.
-        :return: The page content as a string.
+        :return: The page content as string list.
         """
         url = self.book_info_url(book_id=book_id)
         return [await self.fetch(url, **kwargs)]
@@ -98,7 +103,7 @@ class YamiboSession(BaseSession):
         :param book_id: The book identifier.
         :param chapter_id: The chapter identifier.
-        :return: The chapter content as a string.
+        :return: The page content as string list.
         """
         url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
         return [await self.fetch(url, **kwargs)]
@@ -166,10 +171,6 @@ class YamiboSession(BaseSession):
         """
         return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
-    @property
-    def hostname(self) -> str:
-        return "www.yamibo.com"
     async def _api_login(self, username: str, password: str) -> bool:
         """
         Login to the API using a 2-step token-based process.
@@ -227,3 +228,14 @@ class YamiboSession(BaseSession):
         if not resp_text:
             return False
         return not any(kw in resp_text[0] for kw in keywords)
+    @staticmethod
+    def _filter_cookies(
+        raw_cookies: list[Mapping[str, Any]],
+    ) -> dict[str, str]:
+        ALLOWED_DOMAINS = {"www.yamibo.com", "bbs.yamibo.com", ""}
+        return {
+            c["name"]: c["value"]
+            for c in raw_cookies
+            if c.get("domain", "") in ALLOWED_DOMAINS
+        }

novel_downloader/core/fetchers/yibige.py ADDED Viewed

@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.fetchers.yibige
+-------------------------------------
+"""
+import asyncio
+from typing import Any
+from novel_downloader.core.fetchers.base import BaseSession
+from novel_downloader.core.fetchers.registry import register_fetcher
+from novel_downloader.models import FetcherConfig
+@register_fetcher(
+    site_keys=["yibige"],
+)
+class YibigeSession(BaseSession):
+    """
+    A session class for interacting with the 一笔阁 (www.yibige.org) novel website.
+    """
+    BOOK_INFO_URL = "https://{base_url}/{book_id}/"
+    BOOK_CATALOG_URL = "https://{base_url}/{book_id}/index.html"
+    CHAPTER_URL = "https://{base_url}/{book_id}/{chapter_id}.html"
+    def __init__(
+        self,
+        config: FetcherConfig,
+        cookies: dict[str, str] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__("yibige", config, cookies, **kwargs)
+        self.base_url = (
+            "www.yibige.org" if config.locale_style == "simplified" else "tw.yibige.org"
+        )
+        # 主站: www.yibige.org
+        # 新加坡: sg.yibige.org
+        # 臺灣正體: tw.yibige.org
+        # 香港繁體: hk.yibige.org
+    async def get_book_info(
+        self,
+        book_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of the book info page asynchronously.
+        Order: [info, catalog]
+        :param book_id: The book identifier.
+        :return: The page content as string list.
+        """
+        info_url = self.book_info_url(base_url=self.base_url, book_id=book_id)
+        catalog_url = self.book_catalog_url(base_url=self.base_url, book_id=book_id)
+        info_html, catalog_html = await asyncio.gather(
+            self.fetch(info_url, **kwargs),
+            self.fetch(catalog_url, **kwargs),
+        )
+        return [info_html, catalog_html]
+    async def get_book_chapter(
+        self,
+        book_id: str,
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> list[str]:
+        """
+        Fetch the raw HTML of a single chapter asynchronously.
+        :param book_id: The book identifier.
+        :param chapter_id: The chapter identifier.
+        :return: The page content as string list.
+        """
+        url = self.chapter_url(
+            base_url=self.base_url, book_id=book_id, chapter_id=chapter_id
+        )
+        return [await self.fetch(url, **kwargs)]
+    @classmethod
+    def book_info_url(cls, base_url: str, book_id: str) -> str:
+        """
+        Construct the URL for fetching a book's info page.
+        :param book_id: The identifier of the book.
+        :return: Fully qualified URL for the book info page.
+        """
+        return cls.BOOK_INFO_URL.format(base_url=base_url, book_id=book_id)
+    @classmethod
+    def book_catalog_url(cls, base_url: str, book_id: str) -> str:
+        """
+        Construct the URL for fetching a book's catalog page.
+        :param book_id: The identifier of the book.
+        :return: Fully qualified catalog page URL.
+        """
+        return cls.BOOK_CATALOG_URL.format(base_url=base_url, book_id=book_id)
+    @classmethod
+    def chapter_url(cls, base_url: str, book_id: str, chapter_id: str) -> str:
+        """
+        Construct the URL for fetching a specific chapter.
+        :param book_id: The identifier of the book.
+        :param chapter_id: The identifier of the chapter.
+        :return: Fully qualified chapter URL.
+        """
+        return cls.CHAPTER_URL.format(
+            base_url=base_url, book_id=book_id, chapter_id=chapter_id
+        )

novel_downloader/core/interfaces/__init__.py CHANGED Viewed

@@ -3,25 +3,19 @@
 novel_downloader.core.interfaces
 --------------------------------
-This package centralizes the protocol definitions used across the
-system to promote interface-based design and type-safe dependency
-injection.
-Included protocols:
-- DownloaderProtocol
-- FetcherProtocol
-- ParserProtocol
-- ExporterProtocol
+Protocol interfaces defining the contracts for core components.
 """
-from .downloader import DownloaderProtocol
-from .exporter import ExporterProtocol
-from .fetcher import FetcherProtocol
-from .parser import ParserProtocol
 __all__ = [
     "DownloaderProtocol",
     "ExporterProtocol",
     "FetcherProtocol",
     "ParserProtocol",
+    "SearcherProtocol",
 ]
+from .downloader import DownloaderProtocol
+from .exporter import ExporterProtocol
+from .fetcher import FetcherProtocol
+from .parser import ParserProtocol
+from .searcher import SearcherProtocol

novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

novel-downloader 1.4.5py3-none-any.whl → 2.0.0py3-none-any.whl