PyPI - novel-downloader - Versions diffs - 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

novel-downloader 1.5.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (241) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +1 -3
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +26 -21
novel_downloader/cli/download.py +77 -64
novel_downloader/cli/export.py +16 -20
novel_downloader/cli/main.py +1 -1
novel_downloader/cli/search.py +62 -65
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +8 -5
novel_downloader/config/adapter.py +65 -105
novel_downloader/config/{loader.py → file_io.py} +53 -26
novel_downloader/core/__init__.py +1 -0
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +3 -24
novel_downloader/core/downloaders/base.py +49 -23
novel_downloader/core/downloaders/common.py +191 -137
novel_downloader/core/downloaders/qianbi.py +187 -146
novel_downloader/core/downloaders/qidian.py +187 -141
novel_downloader/core/downloaders/registry.py +4 -2
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +3 -20
novel_downloader/core/exporters/base.py +33 -37
novel_downloader/core/exporters/common/__init__.py +1 -2
novel_downloader/core/exporters/common/epub.py +15 -10
novel_downloader/core/exporters/common/main_exporter.py +19 -12
novel_downloader/core/exporters/common/txt.py +14 -9
novel_downloader/core/exporters/epub_util.py +59 -29
novel_downloader/core/exporters/linovelib/__init__.py +1 -0
novel_downloader/core/exporters/linovelib/epub.py +23 -25
novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
novel_downloader/core/exporters/linovelib/txt.py +17 -11
novel_downloader/core/exporters/qidian.py +2 -8
novel_downloader/core/exporters/registry.py +4 -2
novel_downloader/core/exporters/txt_util.py +7 -7
novel_downloader/core/fetchers/__init__.py +54 -48
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/lewenn.py +83 -0
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +5 -16
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/shuhaige.py +84 -0
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/wanbengo.py +83 -0
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +1 -9
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +4 -17
novel_downloader/core/interfaces/parser.py +5 -6
novel_downloader/core/interfaces/searcher.py +9 -1
novel_downloader/core/parsers/__init__.py +49 -12
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +63 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/esjzone.py +61 -66
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/linovelib.py +48 -64
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/qianbi.py +48 -50
novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
novel_downloader/core/parsers/qidian/main_parser.py +11 -38
novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +5 -16
novel_downloader/core/parsers/sfacg.py +38 -45
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +435 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +87 -131
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +34 -3
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
novel_downloader/core/searchers/base.py +112 -36
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +43 -25
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +74 -40
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +24 -8
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +31 -82
novel_downloader/locales/zh.json +32 -83
novel_downloader/models/__init__.py +21 -22
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +4 -37
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +5 -0
novel_downloader/resources/config/settings.toml +8 -70
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +13 -22
novel_downloader/utils/chapter_storage.py +3 -2
novel_downloader/utils/constants.py +4 -29
novel_downloader/utils/cookies.py +6 -18
novel_downloader/utils/crypto_utils/__init__.py +13 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
novel_downloader/utils/epub/__init__.py +1 -1
novel_downloader/utils/epub/constants.py +57 -16
novel_downloader/utils/epub/documents.py +88 -194
novel_downloader/utils/epub/models.py +0 -14
novel_downloader/utils/epub/utils.py +63 -96
novel_downloader/utils/file_utils/__init__.py +2 -23
novel_downloader/utils/file_utils/io.py +3 -113
novel_downloader/utils/file_utils/sanitize.py +0 -4
novel_downloader/utils/fontocr.py +207 -0
novel_downloader/utils/logger.py +8 -16
novel_downloader/utils/network.py +2 -2
novel_downloader/utils/state.py +4 -90
novel_downloader/utils/text_utils/__init__.py +1 -7
novel_downloader/utils/text_utils/diff_display.py +5 -7
novel_downloader/utils/time_utils/__init__.py +5 -11
novel_downloader/utils/time_utils/datetime_utils.py +20 -29
novel_downloader/utils/time_utils/sleep_utils.py +4 -8
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.0.dist-info/METADATA +171 -0
novel_downloader-2.0.0.dist-info/RECORD +210 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
novel_downloader/core/downloaders/biquge.py +0 -29
novel_downloader/core/downloaders/esjzone.py +0 -29
novel_downloader/core/downloaders/linovelib.py +0 -29
novel_downloader/core/downloaders/sfacg.py +0 -29
novel_downloader/core/downloaders/yamibo.py +0 -29
novel_downloader/core/exporters/biquge.py +0 -22
novel_downloader/core/exporters/esjzone.py +0 -22
novel_downloader/core/exporters/qianbi.py +0 -22
novel_downloader/core/exporters/sfacg.py +0 -22
novel_downloader/core/exporters/yamibo.py +0 -22
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -422
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -209
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -198
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -326
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -194
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -234
novel_downloader/core/parsers/biquge.py +0 -139
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/types.py +0 -13
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/fontocr/__init__.py +0 -22
novel_downloader/utils/fontocr/hash_store.py +0 -280
novel_downloader/utils/fontocr/hash_utils.py +0 -103
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -315
novel_downloader/utils/fontocr/ocr_v2.py +0 -764
novel_downloader/utils/fontocr/ocr_v3.py +0 -744
novel_downloader-1.5.0.dist-info/METADATA +0 -196
novel_downloader-1.5.0.dist-info/RECORD +0 -164
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0

novel_downloader/core/parsers/guidaye.py ADDED Viewed

@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.guidaye
+-------------------------------------
+"""
+import re
+from datetime import datetime
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["guidaye"],
+)
+class GuidayeParser(BaseParser):
+    """
+    Parser for 名著阅读 book pages.
+    """
+    BASE_URL = "https://b.guidaye.com"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        # Book metadata
+        book_name = self._first_str(tree.xpath('//h1[@class="page-title"]/a/text()'))
+        author = self._first_str(
+            tree.xpath('//div[@id="category-description-author"]/a/text()')
+        )
+        cover_url = self.BASE_URL + self._first_str(
+            tree.xpath('//div[@id="category-description-image"]//img/@src')
+        )
+        # Summary paragraphs
+        summary = (
+            tree.xpath('string(//div[@id="category-description-text"])')
+            .replace("内容简介：", "", 1)
+            .strip()
+        )
+        # Chapter volumes & listings
+        volumes: list[VolumeInfoDict] = []
+        curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
+        items = tree.xpath('//div[@class="entry-content"]/ul/*')
+        for elem in items:
+            if elem.tag.lower() == "h3":
+                # Flush previous volume
+                if curr_vol["chapters"]:
+                    volumes.append(curr_vol)
+                curr_vol = {"volume_name": elem.text_content().strip(), "chapters": []}
+            elif elem.tag.lower() == "li":
+                link = elem.xpath(".//a")[0]
+                href = link.get("href", "").strip()
+                title = link.get("title", "").strip()
+                cid_match = re.search(r"/(\d+)\.html$", href)
+                chapter_id = cid_match.group(1) if cid_match else ""
+                curr_vol["chapters"].append(
+                    {"title": title, "url": href, "chapterId": chapter_id}
+                )
+        # Append last volume
+        if curr_vol["chapters"]:
+            volumes.append(curr_vol)
+        # Timestamp of parsing
+        share_text = tree.xpath('string(//div[@id="category-description-share"])')
+        m = re.search(r"最近更新[：:]\s*([\d-]+)", share_text)
+        update_time = m.group(1) if m else datetime.now().strftime("%Y-%m-%d")
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        # Title from entry-title
+        title = self._first_str(tree.xpath('//h1[@class="entry-title"]/text()'))
+        # Extract paragraphs within entry-content
+        full_text = tree.xpath('string(//div[@class="entry-content"])')
+        full_text = full_text.replace("\u00A0", " ")
+        # 3. Split into lines and clean up
+        lines = [line.strip() for line in full_text.splitlines() if line.strip()]
+        if not lines:
+            return None
+        content = "\n".join(lines)
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "guidaye"},
+        }

novel_downloader/core/parsers/hetushu.py ADDED Viewed

@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.hetushu
+-------------------------------------
+"""
+import re
+from datetime import datetime
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["hetushu"],
+)
+class HetushuParser(BaseParser):
+    """
+    Parser for 和图书 book pages.
+    """
+    BASE_URL = "https://www.hetushu.com"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        # --- Metadata ---
+        book_name = self._first_str(
+            tree.xpath('//div[contains(@class,"book_info")]/h2/text()')
+        )
+        author = self._first_str(
+            tree.xpath(
+                '//div[contains(@class,"book_info")]/div[contains(.,"作者")]/a/text()'
+            )
+        )
+        cover_url = self.BASE_URL + self._first_str(
+            tree.xpath('//div[contains(@class,"book_info")]//img/@src')
+        )
+        cls_attr = self._first_str(
+            tree.xpath('//div[contains(@class,"book_info")]/@class')
+        )
+        serial_status = "已完结" if "finish" in cls_attr else "连载中"
+        tags = [
+            a.strip()
+            for a in tree.xpath('//dl[@class="tag"]//dd/a/text()')
+            if a.strip()
+        ]
+        paras = tree.xpath('//div[@class="intro"]/p/text()')
+        summary = "\n".join(p.strip() for p in paras if p.strip())
+        # --- Chapter volumes & listings ---
+        volumes: list[VolumeInfoDict] = []
+        curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
+        for elem in tree.xpath('//dl[@id="dir"]/*'):
+            if elem.tag == "dt":
+                # Start a new volume
+                if curr_vol["chapters"]:
+                    volumes.append(curr_vol)
+                curr_vol = {
+                    "volume_name": elem.text_content().strip(),
+                    "chapters": [],
+                }
+            elif elem.tag == "dd":
+                link = elem.xpath(".//a")[0]
+                href = link.get("href", "").strip()
+                title = link.get("title", "").strip()
+                # Extract numeric chapterId from the URL
+                m = re.search(r"/book/\d+/(?P<id>\d+)\.html", href)
+                chapter_id = m.group("id") if m else ""
+                curr_vol["chapters"].append(
+                    {"title": title, "url": href, "chapterId": chapter_id}
+                )
+        # Append the last volume if it has any chapters
+        if curr_vol["chapters"]:
+            volumes.append(curr_vol)
+        update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "serial_status": serial_status,
+            "tags": tags,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(
+            tree.xpath('//div[@id="content"]//h2[@class="h2"]/text()')
+        )
+        paras = tree.xpath('//div[@id="content"]/div[not(@class)]/text()')
+        paragraph_texts = [p.strip() for p in paras if p.strip()]
+        content = "\n".join(paragraph_texts)
+        if not content.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "hetushu"},
+        }

novel_downloader/core/parsers/i25zw.py ADDED Viewed

@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.i25zw
+-----------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["i25zw"],
+)
+class I25zwParser(BaseParser):
+    """
+    Parser for 25中文网 book-info pages.
+    """
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if len(html_list) < 2:
+            return None
+        info_tree = html.fromstring(html_list[0])
+        catalog_tree = html.fromstring(html_list[1])
+        # Metadata extraction
+        book_name = self._first_str(info_tree.xpath("//h1[@class='f21h']/text()"))
+        author = self._first_str(info_tree.xpath("//h1[@class='f21h']/em/a/text()"))
+        cover_url = self._first_str(info_tree.xpath("//div[@class='pic']/img/@src"))
+        # Tags, status, word count, update time
+        tag = self._first_str(
+            info_tree.xpath("//b[contains(text(),'小说分类')]/parent::td/text()")
+        )
+        serial_status = self._first_str(
+            info_tree.xpath("//b[contains(text(),'小说状态')]/parent::td/text()")
+        )
+        word_count = self._first_str(
+            info_tree.xpath("//b[contains(text(),'全文字数')]/parent::td/text()")
+        )
+        raw_update = self._first_str(
+            info_tree.xpath("//b[contains(text(),'更新时间')]/parent::td/text()")
+        )
+        update_time = raw_update.strip("()")
+        # Summary from styled intro div
+        full_intro = info_tree.xpath("string(//div[@class='intro'][@style])").strip()
+        summary = full_intro.replace(f"关于{book_name}：", "", 1).strip()
+        # Chapter list extraction
+        dl = catalog_tree.xpath("//div[@id='list']/dl")[0]
+        # Full-text section dd's
+        dds = dl.xpath("./dd[preceding-sibling::dt[1][contains(., '正文')]]/a")
+        if not dds:
+            # Fallback to second <dt>'s following <dd>
+            dds = dl.xpath("./dt[2]/following-sibling::dd/a")
+        chapters: list[ChapterInfoDict] = []
+        for a in dds:
+            url = a.get("href", "").strip()
+            title = a.text_content().strip()
+            # '/311006/252845677.html' -> '252845677'
+            chapter_id = url.split("/")[-1].split(".")[0]
+            chapters.append(
+                {
+                    "title": title,
+                    "url": url,
+                    "chapterId": chapter_id,
+                }
+            )
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "word_count": word_count,
+            "serial_status": serial_status,
+            "tags": [tag] if tag else [],
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title_text = self._first_str(
+            tree.xpath("//div[@class='zhangjieming']/h1/text()")
+        )
+        content_divs = tree.xpath("//div[@id='content']")
+        if not content_divs:
+            return None
+        content_div = content_divs[0]
+        # Only select direct <p> children to avoid nav links
+        paragraphs = []
+        for p in content_div.xpath("./p"):
+            text = p.text_content().strip()
+            if text:
+                paragraphs.append(text)
+        content_text = "\n".join(paragraphs)
+        if not content_text.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title_text,
+            "content": content_text,
+            "extra": {"site": "i25zw"},
+        }

novel_downloader/core/parsers/ixdzs8.py ADDED Viewed

@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.ixdzs8
+------------------------------------
+"""
+import contextlib
+import json
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["ixdzs8"],
+)
+class Ixdzs8Parser(BaseParser):
+    """
+    Parser for 爱下电子书 book pages.
+    """
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if len(html_list) < 2 or not html_list[0] or not html_list[1]:
+            return None
+        # Parse HTML
+        tree = html.fromstring(html_list[0])
+        book_name = self._meta(tree, "og:novel:book_name") or self._first_str(
+            tree.xpath("//div[@class='n-text']/h1/text()")
+        )
+        author = self._meta(tree, "og:novel:author") or self._first_str(
+            tree.xpath("//div[@class='n-text']//a[contains(@class,'bauthor')]/text()")
+        )
+        cover_url = self._meta(tree, "og:image")
+        if not cover_url:
+            cover_url = self._first_str(tree.xpath("//div[@class='n-img']//img/@src"))
+        serial_status = self._meta(tree, "og:novel:status")
+        # 2022-08-25T18:08:03+08:00 -> 2022-08-25 18:08:03
+        iso_time = self._meta(tree, "og:novel:update_time")
+        update_time = ""
+        if iso_time:
+            update_time = iso_time.replace("T", " ").split("+", 1)[0].strip()
+        word_count = self._first_str(
+            tree.xpath("//div[@class='n-text']//span[contains(@class,'nsize')]/text()")
+        )
+        raw_summary = self._meta(tree, "og:description")
+        summary = ""
+        if raw_summary:
+            s = raw_summary.replace("&nbsp;", "")
+            s = s.replace("<br />", "\n")
+            summary = "\n".join(
+                self._norm_space(line) for line in s.splitlines()
+            ).strip()
+        tags = [
+            self._norm_space(t)
+            for t in tree.xpath("//div[contains(@class,'tags')]//em/a/text()")
+            if t and t.strip()
+        ]
+        category = self._meta(tree, "og:novel:category") or self._first_str(
+            tree.xpath("//div[@class='n-text']/p[a[contains(@class,'nsort')]]/a/text()")
+        )
+        if category:
+            tags.append(category)
+        book_path = self._meta(tree, "og:novel:read_url") or self._meta(tree, "og:url")
+        book_id = ""
+        if book_path:
+            book_id = book_path.strip("/").split("/")[-1]
+        data = {}
+        with contextlib.suppress(Exception):
+            data = json.loads(html_list[1])
+        clist = data.get("data", []) if isinstance(data, dict) else []
+        chapters: list[ChapterInfoDict] = []
+        for chap in clist:
+            ordernum = str(chap.get("ordernum", "")).strip()
+            if not ordernum:
+                continue
+            title = self._norm_space(chap.get("title", "") or "") or "未命名章节"
+            url = f"/read/{book_id}/p{ordernum}.html" if book_id else ""
+            chapters.append(
+                {
+                    "url": url,
+                    "title": title,
+                    "chapterId": f"p{ordernum}",
+                }
+            )
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "serial_status": serial_status,
+            "update_time": update_time,
+            "word_count": word_count,
+            "summary": summary,
+            "tags": tags,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(tree.xpath("//div[@class='page-d-top']/h1/text()"))
+        if not title:
+            title = self._first_str(
+                tree.xpath("//article[contains(@class,'page-content')]//h3/text()")
+            )
+        title = self._norm_space(title)
+        # paragraphs within the reading section; skip ad containers
+        ps = tree.xpath(
+            "//article[contains(@class,'page-content')]//section//p[not(contains(@class,'abg'))]"
+        )
+        paragraphs: list[str] = []
+        for p in ps:
+            raw = p.text_content()
+            txt = self._norm_space(raw)
+            if not txt or self._is_ad_line(txt):
+                continue
+            paragraphs.append(txt)
+        if not paragraphs:
+            return None
+        # Replace FIRST line with .replace(title, "")
+        first = paragraphs[0].replace(title, "")
+        first = first.replace(title.replace(" ", ""), "").strip()
+        if first:
+            paragraphs[0] = first
+        else:
+            paragraphs.pop(0)
+        if paragraphs:
+            last = paragraphs[-1]
+            if "本章完" in last:
+                paragraphs.pop()
+        content = "\n".join(paragraphs)
+        if not content.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "ixdzs8"},
+        }
+    @classmethod
+    def _meta(cls, tree: html.HtmlElement, prop: str) -> str:
+        return cls._first_str(tree.xpath(f"//meta[@property='{prop}']/@content"))

novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

novel-downloader 1.5.0py3-none-any.whl → 2.0.0py3-none-any.whl