PyPI - novel-downloader - Versions diffs - 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

novel-downloader 1.5.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (241) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +1 -3
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +26 -21
novel_downloader/cli/download.py +77 -64
novel_downloader/cli/export.py +16 -20
novel_downloader/cli/main.py +1 -1
novel_downloader/cli/search.py +62 -65
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +8 -5
novel_downloader/config/adapter.py +65 -105
novel_downloader/config/{loader.py → file_io.py} +53 -26
novel_downloader/core/__init__.py +1 -0
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +3 -24
novel_downloader/core/downloaders/base.py +49 -23
novel_downloader/core/downloaders/common.py +191 -137
novel_downloader/core/downloaders/qianbi.py +187 -146
novel_downloader/core/downloaders/qidian.py +187 -141
novel_downloader/core/downloaders/registry.py +4 -2
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +3 -20
novel_downloader/core/exporters/base.py +33 -37
novel_downloader/core/exporters/common/__init__.py +1 -2
novel_downloader/core/exporters/common/epub.py +15 -10
novel_downloader/core/exporters/common/main_exporter.py +19 -12
novel_downloader/core/exporters/common/txt.py +14 -9
novel_downloader/core/exporters/epub_util.py +59 -29
novel_downloader/core/exporters/linovelib/__init__.py +1 -0
novel_downloader/core/exporters/linovelib/epub.py +23 -25
novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
novel_downloader/core/exporters/linovelib/txt.py +17 -11
novel_downloader/core/exporters/qidian.py +2 -8
novel_downloader/core/exporters/registry.py +4 -2
novel_downloader/core/exporters/txt_util.py +7 -7
novel_downloader/core/fetchers/__init__.py +54 -48
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/lewenn.py +83 -0
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +5 -16
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/shuhaige.py +84 -0
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/wanbengo.py +83 -0
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +1 -9
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +4 -17
novel_downloader/core/interfaces/parser.py +5 -6
novel_downloader/core/interfaces/searcher.py +9 -1
novel_downloader/core/parsers/__init__.py +49 -12
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +63 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/esjzone.py +61 -66
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/linovelib.py +48 -64
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/qianbi.py +48 -50
novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
novel_downloader/core/parsers/qidian/main_parser.py +11 -38
novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +5 -16
novel_downloader/core/parsers/sfacg.py +38 -45
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +435 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +87 -131
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +34 -3
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
novel_downloader/core/searchers/base.py +112 -36
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +43 -25
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +74 -40
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +24 -8
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +31 -82
novel_downloader/locales/zh.json +32 -83
novel_downloader/models/__init__.py +21 -22
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +4 -37
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +5 -0
novel_downloader/resources/config/settings.toml +8 -70
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +13 -22
novel_downloader/utils/chapter_storage.py +3 -2
novel_downloader/utils/constants.py +4 -29
novel_downloader/utils/cookies.py +6 -18
novel_downloader/utils/crypto_utils/__init__.py +13 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
novel_downloader/utils/epub/__init__.py +1 -1
novel_downloader/utils/epub/constants.py +57 -16
novel_downloader/utils/epub/documents.py +88 -194
novel_downloader/utils/epub/models.py +0 -14
novel_downloader/utils/epub/utils.py +63 -96
novel_downloader/utils/file_utils/__init__.py +2 -23
novel_downloader/utils/file_utils/io.py +3 -113
novel_downloader/utils/file_utils/sanitize.py +0 -4
novel_downloader/utils/fontocr.py +207 -0
novel_downloader/utils/logger.py +8 -16
novel_downloader/utils/network.py +2 -2
novel_downloader/utils/state.py +4 -90
novel_downloader/utils/text_utils/__init__.py +1 -7
novel_downloader/utils/text_utils/diff_display.py +5 -7
novel_downloader/utils/time_utils/__init__.py +5 -11
novel_downloader/utils/time_utils/datetime_utils.py +20 -29
novel_downloader/utils/time_utils/sleep_utils.py +4 -8
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.0.dist-info/METADATA +171 -0
novel_downloader-2.0.0.dist-info/RECORD +210 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
novel_downloader/core/downloaders/biquge.py +0 -29
novel_downloader/core/downloaders/esjzone.py +0 -29
novel_downloader/core/downloaders/linovelib.py +0 -29
novel_downloader/core/downloaders/sfacg.py +0 -29
novel_downloader/core/downloaders/yamibo.py +0 -29
novel_downloader/core/exporters/biquge.py +0 -22
novel_downloader/core/exporters/esjzone.py +0 -22
novel_downloader/core/exporters/qianbi.py +0 -22
novel_downloader/core/exporters/sfacg.py +0 -22
novel_downloader/core/exporters/yamibo.py +0 -22
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -422
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -209
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -198
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -326
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -194
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -234
novel_downloader/core/parsers/biquge.py +0 -139
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/types.py +0 -13
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/fontocr/__init__.py +0 -22
novel_downloader/utils/fontocr/hash_store.py +0 -280
novel_downloader/utils/fontocr/hash_utils.py +0 -103
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -315
novel_downloader/utils/fontocr/ocr_v2.py +0 -764
novel_downloader/utils/fontocr/ocr_v3.py +0 -744
novel_downloader-1.5.0.dist-info/METADATA +0 -196
novel_downloader-1.5.0.dist-info/RECORD +0 -164
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0

novel_downloader/core/parsers/qbtr.py ADDED Viewed

@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.qbtr
+----------------------------------
+"""
+import re
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["qbtr"],
+)
+class QbtrParser(BaseParser):
+    """
+    Parser for 全本同人小说 book pages.
+    """
+    BASE_URL = "https://www.qbtr.cc"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        # Parse the main info page
+        tree = html.fromstring(html_list[0])
+        # Book name
+        book_name = self._first_str(tree.xpath('//div[@class="infos"]/h1/text()'))
+        # Tags: the second breadcrumb (e.g., "同人小说")
+        tag = self._first_str(
+            tree.xpath('//div[contains(@class,"menNav")]/a[2]/text()')
+        )
+        tags = [tag] if tag else []
+        # Author & update_time from the date div
+        date_div = tree.xpath('//div[@class="date"]')
+        date_text = html.tostring(date_div[0], encoding="unicode", method="text")
+        author_match = re.search(r"作者[：:]\s*([^日]+)", date_text)
+        author = author_match.group(1).strip() if author_match else ""
+        date_match = re.search(r"日期[：:]\s*([\d-]+)", date_text)
+        update_time = date_match.group(1) if date_match else ""
+        # Summary from the <p> inside infos
+        paras = tree.xpath('//div[@class="infos"]/p//text()')
+        summary = "\n".join(p.strip() for p in paras if p.strip())
+        # Chapters from the book_list
+        chapters: list[ChapterInfoDict] = []
+        for a in tree.xpath('//div[contains(@class,"book_list")]//li/a'):
+            url = a.get("href", "").strip()
+            title = a.text_content().strip()
+            # General regex: /{category}/{bookId}/{chapterId}.html
+            m = re.search(r"^/[^/]+/\d+/(\d+)\.html$", url)
+            cid = m.group(1) if m else ""
+            chapters.append({"title": title, "url": url, "chapterId": cid})
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        # Parse the download page (second HTML)
+        download_url = ""
+        if len(html_list) > 1 and html_list[1]:
+            dtree = html.fromstring(html_list[1])
+            a = dtree.xpath('//a[@id="dowloadnUrl"]')
+            if a:
+                link = a[0].get("link") or a[0].get("href") or ""
+                download_url = self._fix_download_link(link)
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": "",
+            "update_time": update_time,
+            "tags": tags,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {"download_url": download_url},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        raw_title = self._first_str(
+            tree.xpath('//div[contains(@class,"read_chapterName")]//h1/text()')
+        )
+        crumbs = tree.xpath('//div[contains(@class,"readTop")]//a/text()')
+        book_name = crumbs[-1].strip() if crumbs else ""
+        title = raw_title.replace(book_name, "").strip()
+        paragraphs = tree.xpath('//div[contains(@class,"read_chapterDetail")]/p')
+        texts = []
+        for p in paragraphs:
+            txt = p.text_content().strip()
+            if txt:
+                texts.append(txt)
+        content = "\n".join(texts)
+        if not content:
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "qbtr"},
+        }
+    @classmethod
+    def _fix_download_link(cls, link: str) -> str:
+        true_link = link.replace("qb../", "/e/DownSys/")
+        return f"{cls.BASE_URL}{true_link}"

novel_downloader/core/parsers/qianbi.py CHANGED Viewed

@@ -12,63 +12,60 @@ from lxml import html
 from novel_downloader.core.parsers.base import BaseParser
 from novel_downloader.core.parsers.registry import register_parser
-from novel_downloader.models import ChapterDict
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    VolumeInfoDict,
+)
 @register_parser(
     site_keys=["qianbi"],
-    backends=["session", "browser"],
 )
 class QianbiParser(BaseParser):
-    """ """
+    """
+    Parser for 铅笔小说 book pages.
+    """
     def parse_book_info(
         self,
         html_list: list[str],
         **kwargs: Any,
-    ) -> dict[str, Any]:
-        """
-        Parse a book info page and extract metadata and chapter structure.
-        :param html_list: Raw HTML of the book info pages.
-        :return: Parsed metadata and chapter structure as a dictionary.
-        """
+    ) -> BookInfoDict | None:
         if len(html_list) < 2:
-            return {}
+            return None
         info_tree = html.fromstring(html_list[0])
         catalog_tree = html.fromstring(html_list[1])
-        result: dict[str, Any] = {}
-        title = info_tree.xpath('//h1[@class="page-title"]/text()')
-        result["book_name"] = title[0].strip() if title else ""
-        author = info_tree.xpath('//a[contains(@href,"/author/")]/@title')
-        result["author"] = author[0].strip() if author else ""
-        cover = info_tree.xpath('//div[@class="novel-cover"]//img/@data-src')
-        result["cover_url"] = cover[0].strip() if cover else ""
-        status = info_tree.xpath(
-            '//a[@class="tag-link" and (text()="完结" or text()="连载")]/text()'
+        book_name = self._first_str(info_tree.xpath('//h1[@class="page-title"]/text()'))
+        author = self._first_str(
+            info_tree.xpath('//a[contains(@href,"/author/")]/@title')
+        )
+        cover_url = self._first_str(
+            info_tree.xpath('//div[@class="novel-cover"]//img/@data-src')
+        )
+        serial_status = self._first_str(
+            info_tree.xpath(
+                '//a[@class="tag-link" and (text()="完结" or text()="连载")]/text()'
+            )
+        )
+        word_count = self._first_str(
+            info_tree.xpath('//span[contains(text(), "字")]/text()')
         )
-        result["serial_status"] = status[0] if status else ""
-        word_count_raw = info_tree.xpath('//span[contains(text(), "万字")]/text()')
-        result["word_count"] = word_count_raw[0].strip() if word_count_raw else ""
         summary_node = info_tree.xpath(
             '//div[@class="novel-info-item novel-info-content"]/span'
         )
         if summary_node and summary_node[0] is not None:
-            result["summary"] = summary_node[0].text_content().strip()
+            summary = str(summary_node[0].text_content()).strip()
         else:
-            result["summary"] = ""
+            summary = ""
-        result["update_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        volumes: list[dict[str, Any]] = []
-        current_volume = None
+        volumes: list[VolumeInfoDict] = []
+        current_volume: VolumeInfoDict | None = None
         for elem in catalog_tree.xpath('//div[@class="box"]/*'):
             class_attr = elem.get("class", "")
@@ -104,9 +101,17 @@ class QianbiParser(BaseParser):
         if current_volume:
             volumes.append(current_volume)
-        result["volumes"] = volumes
-        return result
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "word_count": word_count,
+            "serial_status": serial_status,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
     def parse_chapter(
         self,
@@ -114,31 +119,24 @@ class QianbiParser(BaseParser):
         chapter_id: str,
         **kwargs: Any,
     ) -> ChapterDict | None:
-        """
-        Parse a single chapter page and extract clean text or simplified HTML.
-        :param html_list: Raw HTML of the chapter page.
-        :param chapter_id: Identifier of the chapter being parsed.
-        :return: Cleaned chapter content as plain text or minimal HTML.
-        """
         if not html_list:
             return None
         tree = html.fromstring(html_list[0])
+        # Content paragraphs
         paras = tree.xpath('//div[@class="article-content"]/p/text()')
-        content_text = "\n\n".join(p.strip() for p in paras if p.strip())
+        content_text = "\n".join(p.strip() for p in paras if p.strip())
         if not content_text:
             return None
-        title = tree.xpath('//h1[@class="article-title"]/text()')
-        title_text = title[0].strip() if title else ""
+        title_text = self._first_str(tree.xpath('//h1[@class="article-title"]/text()'))
+        volume_text = self._first_str(tree.xpath('//h3[@class="text-muted"]/text()'))
-        volume = tree.xpath('//h3[@class="text-muted"]/text()')
-        volume_text = volume[0].strip() if volume else ""
-        next_href = tree.xpath('//div[@class="footer"]/a[@class="f-right"]/@href')
+        next_href = self._first_str(
+            tree.xpath('//div[@class="footer"]/a[@class="f-right"]/@href')
+        )
         next_chapter_id = (
-            next_href[0].split("/")[-1].replace(".html", "") if next_href else ""
+            next_href.split("/")[-1].replace(".html", "") if next_href else ""
         )
         return {

novel_downloader/core/parsers/qidian/book_info_parser.py CHANGED Viewed

@@ -12,10 +12,11 @@ time, status, word count, summary, and volume-chapter structure.
 import logging
 import re
 from datetime import datetime
-from typing import Any
 from lxml import html
+from novel_downloader.models import BookInfoDict, ChapterInfoDict, VolumeInfoDict
 logger = logging.getLogger(__name__)
@@ -23,7 +24,7 @@ def _chapter_url_to_id(url: str) -> str:
     return url.rstrip("/").split("/")[-1]
-def parse_book_info(html_str: str) -> dict[str, Any]:
+def parse_book_info(html_str: str) -> BookInfoDict | None:
     """
     Extract metadata: title, author, cover_url, update_time, status,
     word_count, summary, and volumes with chapters.
@@ -31,60 +32,58 @@ def parse_book_info(html_str: str) -> dict[str, Any]:
     :param html_str: Raw HTML of the book info page.
     :return: A dict containing book metadata.
     """
-    info: dict[str, Any] = {}
-    try:
-        doc = html.fromstring(html_str)
-        info["book_name"] = doc.xpath('string(//h1[@id="bookName"])').strip()
-        info["author"] = doc.xpath('string(//a[@class="writer-name"])').strip()
-        book_id = doc.xpath('//a[@id="bookImg"]/@data-bid')[0]
-        info[
-            "cover_url"
-        ] = f"https://bookcover.yuewen.com/qdbimg/349573/{book_id}/600.webp"
-        ut = (
-            doc.xpath('string(//span[@class="update-time"])')
-            .replace("更新时间:", "")
-            .strip()
-        )
-        if re.match(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$", ut):
-            info["update_time"] = ut
-        else:
-            info["update_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        info["serial_status"] = doc.xpath(
-            'string(//p[@class="book-attribute"]/span[1])'
-        ).strip()
-        tags = doc.xpath('//p[contains(@class,"all-label")]//a/text()')
-        info["tags"] = [t.strip() for t in tags if t.strip()]
-        info["word_count"] = doc.xpath('string(//p[@class="count"]/em[1])').strip()
-        summary = doc.xpath('string(//p[@class="intro"])').strip()
-        info["summary_brief"] = summary
-        raw = doc.xpath('//p[@id="book-intro-detail"]//text()')
-        info["summary"] = "\n".join(line.strip() for line in raw if line.strip())
-        volumes = []
-        for vol in doc.xpath('//div[@id="allCatalog"]//div[@class="catalog-volume"]'):
-            vol_name = vol.xpath('string(.//h3[@class="volume-name"])').strip()
-            vol_name = vol_name.split(chr(183))[0].strip()
-            chapters = []
-            for li in vol.xpath('.//ul[contains(@class,"volume-chapters")]/li'):
-                a = li.xpath('.//a[@class="chapter-name"]')[0]
-                title = a.text.strip()
-                url = a.get("href")
-                chapters.append(
-                    {"title": title, "url": url, "chapterId": _chapter_url_to_id(url)}
-                )
-            volumes.append({"volume_name": vol_name, "chapters": chapters})
-        info["volumes"] = volumes
-    except Exception as e:
-        logger.warning("[Parser] Error parsing book info: %s", e)
-    return info
+    doc = html.fromstring(html_str)
+    book_name = doc.xpath('string(//h1[@id="bookName"])').strip()
+    author = doc.xpath('string(//a[@class="writer-name"])').strip()
+    book_id = doc.xpath('//a[@id="bookImg"]/@data-bid')[0]
+    cover_url = f"https://bookcover.yuewen.com/qdbimg/349573/{book_id}/600.webp"
+    ut = doc.xpath('string(//span[@class="update-time"])')
+    ut = ut.replace("更新时间:", "").strip()
+    if re.match(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$", ut):
+        update_time = ut
+    else:
+        update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    serial_status = doc.xpath('string(//p[@class="book-attribute"]/span[1])').strip()
+    tags_elem = doc.xpath('//p[contains(@class,"all-label")]//a/text()')
+    tags = [t.strip() for t in tags_elem if t.strip()]
+    word_count = doc.xpath('string(//p[@class="count"]/em[1])').strip()
+    summary_brief = doc.xpath('string(//p[@class="intro"])').strip()
+    raw = doc.xpath('//p[@id="book-intro-detail"]//text()')
+    summary = "\n".join(line.strip() for line in raw if line.strip())
+    volumes: list[VolumeInfoDict] = []
+    for vol in doc.xpath('//div[@id="allCatalog"]//div[@class="catalog-volume"]'):
+        vol_name = vol.xpath('string(.//h3[@class="volume-name"])').strip()
+        vol_name = vol_name.split(chr(183))[0].strip()
+        chapters: list[ChapterInfoDict] = []
+        for li in vol.xpath('.//ul[contains(@class,"volume-chapters")]/li'):
+            a = li.xpath('.//a[@class="chapter-name"]')[0]
+            title = a.text.strip()
+            url = a.get("href")
+            chapters.append(
+                {"title": title, "url": url, "chapterId": _chapter_url_to_id(url)}
+            )
+        volumes.append({"volume_name": vol_name, "chapters": chapters})
+    return {
+        "book_name": book_name,
+        "author": author,
+        "cover_url": cover_url,
+        "update_time": update_time,
+        "word_count": word_count,
+        "serial_status": serial_status,
+        "tags": tags,
+        "summary_brief": summary_brief,
+        "summary": summary,
+        "volumes": volumes,
+        "extra": {},
+    }

novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

novel-downloader 1.5.0py3-none-any.whl → 2.0.0py3-none-any.whl