PyPI - novel-downloader - Versions diffs - 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

novel-downloader 1.5.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (241) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +1 -3
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +26 -21
novel_downloader/cli/download.py +77 -64
novel_downloader/cli/export.py +16 -20
novel_downloader/cli/main.py +1 -1
novel_downloader/cli/search.py +62 -65
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +8 -5
novel_downloader/config/adapter.py +65 -105
novel_downloader/config/{loader.py → file_io.py} +53 -26
novel_downloader/core/__init__.py +1 -0
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +3 -24
novel_downloader/core/downloaders/base.py +49 -23
novel_downloader/core/downloaders/common.py +191 -137
novel_downloader/core/downloaders/qianbi.py +187 -146
novel_downloader/core/downloaders/qidian.py +187 -141
novel_downloader/core/downloaders/registry.py +4 -2
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +3 -20
novel_downloader/core/exporters/base.py +33 -37
novel_downloader/core/exporters/common/__init__.py +1 -2
novel_downloader/core/exporters/common/epub.py +15 -10
novel_downloader/core/exporters/common/main_exporter.py +19 -12
novel_downloader/core/exporters/common/txt.py +14 -9
novel_downloader/core/exporters/epub_util.py +59 -29
novel_downloader/core/exporters/linovelib/__init__.py +1 -0
novel_downloader/core/exporters/linovelib/epub.py +23 -25
novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
novel_downloader/core/exporters/linovelib/txt.py +17 -11
novel_downloader/core/exporters/qidian.py +2 -8
novel_downloader/core/exporters/registry.py +4 -2
novel_downloader/core/exporters/txt_util.py +7 -7
novel_downloader/core/fetchers/__init__.py +54 -48
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/lewenn.py +83 -0
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +5 -16
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/shuhaige.py +84 -0
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/wanbengo.py +83 -0
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +1 -9
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +4 -17
novel_downloader/core/interfaces/parser.py +5 -6
novel_downloader/core/interfaces/searcher.py +9 -1
novel_downloader/core/parsers/__init__.py +49 -12
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +63 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/esjzone.py +61 -66
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/linovelib.py +48 -64
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/qianbi.py +48 -50
novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
novel_downloader/core/parsers/qidian/main_parser.py +11 -38
novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +5 -16
novel_downloader/core/parsers/sfacg.py +38 -45
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +435 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +87 -131
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +34 -3
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
novel_downloader/core/searchers/base.py +112 -36
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +43 -25
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +74 -40
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +24 -8
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +31 -82
novel_downloader/locales/zh.json +32 -83
novel_downloader/models/__init__.py +21 -22
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +4 -37
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +5 -0
novel_downloader/resources/config/settings.toml +8 -70
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +13 -22
novel_downloader/utils/chapter_storage.py +3 -2
novel_downloader/utils/constants.py +4 -29
novel_downloader/utils/cookies.py +6 -18
novel_downloader/utils/crypto_utils/__init__.py +13 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
novel_downloader/utils/epub/__init__.py +1 -1
novel_downloader/utils/epub/constants.py +57 -16
novel_downloader/utils/epub/documents.py +88 -194
novel_downloader/utils/epub/models.py +0 -14
novel_downloader/utils/epub/utils.py +63 -96
novel_downloader/utils/file_utils/__init__.py +2 -23
novel_downloader/utils/file_utils/io.py +3 -113
novel_downloader/utils/file_utils/sanitize.py +0 -4
novel_downloader/utils/fontocr.py +207 -0
novel_downloader/utils/logger.py +8 -16
novel_downloader/utils/network.py +2 -2
novel_downloader/utils/state.py +4 -90
novel_downloader/utils/text_utils/__init__.py +1 -7
novel_downloader/utils/text_utils/diff_display.py +5 -7
novel_downloader/utils/time_utils/__init__.py +5 -11
novel_downloader/utils/time_utils/datetime_utils.py +20 -29
novel_downloader/utils/time_utils/sleep_utils.py +4 -8
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.0.dist-info/METADATA +171 -0
novel_downloader-2.0.0.dist-info/RECORD +210 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
novel_downloader/core/downloaders/biquge.py +0 -29
novel_downloader/core/downloaders/esjzone.py +0 -29
novel_downloader/core/downloaders/linovelib.py +0 -29
novel_downloader/core/downloaders/sfacg.py +0 -29
novel_downloader/core/downloaders/yamibo.py +0 -29
novel_downloader/core/exporters/biquge.py +0 -22
novel_downloader/core/exporters/esjzone.py +0 -22
novel_downloader/core/exporters/qianbi.py +0 -22
novel_downloader/core/exporters/sfacg.py +0 -22
novel_downloader/core/exporters/yamibo.py +0 -22
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -422
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -209
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -198
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -326
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -194
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -234
novel_downloader/core/parsers/biquge.py +0 -139
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/types.py +0 -13
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/fontocr/__init__.py +0 -22
novel_downloader/utils/fontocr/hash_store.py +0 -280
novel_downloader/utils/fontocr/hash_utils.py +0 -103
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -315
novel_downloader/utils/fontocr/ocr_v2.py +0 -764
novel_downloader/utils/fontocr/ocr_v3.py +0 -744
novel_downloader-1.5.0.dist-info/METADATA +0 -196
novel_downloader-1.5.0.dist-info/RECORD +0 -164
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0

novel_downloader/core/parsers/xshbook.py ADDED Viewed

@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.xshbook
+-------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["xshbook"],
+)
+class XshbookParser(BaseParser):
+    """Parser for 小说虎 book pages."""
+    BASE = "http://www.xshbook.com"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        book_name = self._first_str(tree.xpath("//div[@id='info']/h1/text()"))
+        author = self._first_str(
+            tree.xpath("//div[@id='info']/p[1]/text()"),
+            replaces=[("\xa0", ""), ("作者:", "")],
+        )
+        update_time = self._first_str(
+            tree.xpath("//meta[@property='og:novel:update_time']/@content")
+        )
+        summary = "\n".join(
+            self._first_str(p.xpath("string()").splitlines())
+            for p in tree.xpath("//div[@id='intro']//p")
+        ).strip()
+        summary = summary.split("本站提示", 1)[0].strip()
+        cover_url = self._first_str(tree.xpath("//div[@id='fmimg']//img/@src"))
+        book_type = self._first_str(tree.xpath("//div[@class='con_top']/a[2]/text()"))
+        tags: list[str] = [book_type] if book_type else []
+        chapters: list[ChapterInfoDict] = []
+        for a in tree.xpath("//div[@id='list']//dd/a"):
+            href = a.get("href", "")
+            title = self._norm_space(a.text_content())
+            # /95071/95071941/389027455.html -> "389027455"
+            chapter_id = href.rsplit("/", 1)[-1].split(".", 1)[0]
+            chapters.append({"title": title, "url": href, "chapterId": chapter_id})
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "summary": summary,
+            "tags": tags,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
+        if not title:
+            title = self._first_str(
+                tree.xpath("//div[@class='con_top']/text()[last()]")
+            )
+        cont_nodes = tree.xpath("//div[@id='content']")
+        if not cont_nodes:
+            return None
+        cont = cont_nodes[0]
+        # remove scripts under content
+        for s in cont.xpath(".//script"):
+            s.getparent().remove(s)
+        paragraphs: list[str] = []
+        for p in cont.xpath(".//p"):
+            text = html.tostring(p, method="text", encoding="unicode")
+            text = text.replace("\xa0", " ")
+            # filter boilerplate lines
+            bad = (
+                "谨记我们的网址" in text
+                or "温馨提示" in text
+                or "提示" in text
+                and "本文" not in text
+                and len(text) < 60
+                or "分享" in text
+                and len(text) < 40
+            )
+            if not bad:
+                paragraphs.append(text)
+        content = "\n".join(self._norm_space(p) for p in paragraphs if p.strip())
+        if not content.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "xshbook"},
+        }

novel_downloader/core/parsers/yamibo.py CHANGED Viewed

@@ -11,148 +11,104 @@ from lxml import html
 from novel_downloader.core.parsers.base import BaseParser
 from novel_downloader.core.parsers.registry import register_parser
-from novel_downloader.models import ChapterDict
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
 @register_parser(
     site_keys=["yamibo"],
-    backends=["session", "browser"],
 )
 class YamiboParser(BaseParser):
-    """ """
+    """
+    Parser for 百合会 book pages.
+    """
     BASE_URL = "https://www.yamibo.com"
-    # Book info XPaths
-    _BOOK_NAME_XPATH = 'string(//h3[contains(@class, "col-md-12")])'
-    _AUTHOR_XPATH = 'string(//h5[contains(@class, "text-warning")])'
-    _COVER_URL_XPATH = '//img[contains(@class, "img-responsive")]/@src'
-    _UPDATE_TIME_XPATH = '//p[contains(text(), "更新时间：")]'
-    _SERIAL_STATUS_XPATH = '//p[contains(text(), "作品状态：")]'
-    _TYPE_XPATH = '//p[contains(text(), "作品分类：")]'
-    _SUMMARY_XPATH = 'string(//div[@id="w0-collapse1"]/div)'
-    _VOLUME_NODE_XPATH = (
-        '//div[contains(@class, "panel-info") and contains(@class, "panel-default")]'
-    )
-    _VOLUME_TITLE_XPATH = './/div[contains(@class, "panel-heading")]//a/text()'
-    _CHAPTER_NODE_XPATH = (
-        './/div[contains(@class, "panel-body")]//a[contains(@href, "view-chapter")]'
-    )
-    _CHAPTER_FLAT_XPATH = (
-        '//div[@class="panel-body"]//a[contains(@href, "view-chapter")]'
-    )
-    # Chapter field XPaths
-    _CHAPTER_TITLE_XPATH = "string(//section[contains(@class, 'col-md-9')]//h3)"
-    _CHAPTER_TIME_XPATH = (
-        "//div[contains(@class, 'row')]//div[contains(text(), '更新时间')]"
-    )
-    _CHAPTER_WORD_COUNT_XPATH = (
-        "//div[contains(@class, 'row')]//div[contains(text(), '章节字数')]"
-    )
-    _CHAPTER_CONTENT_XPATH = "//div[@id='w0-collapse1']//p//text()"
     def parse_book_info(
         self,
         html_list: list[str],
         **kwargs: Any,
-    ) -> dict[str, Any]:
-        """
-        Parse a book info page and extract metadata and chapter structure.
-        :param html_list: Raw HTML of the book info page.
-        :return: Parsed metadata and chapter structure as a dictionary.
-        """
+    ) -> BookInfoDict | None:
         if not html_list:
-            return {}
+            return None
         tree = html.fromstring(html_list[0])
-        result: dict[str, Any] = {}
-        result["book_name"] = tree.xpath(self._BOOK_NAME_XPATH).strip()
-        result["author"] = tree.xpath(self._AUTHOR_XPATH).strip()
-        cover = tree.xpath(self._COVER_URL_XPATH)
-        result["cover_url"] = f"{self.BASE_URL}{cover[0]}" if cover else ""
-        update_node = tree.xpath(self._UPDATE_TIME_XPATH)
-        result["update_time"] = (
-            update_node[0].xpath("string()").replace("更新时间：", "").strip()
-            if update_node
-            else ""
+        book_name = self._first_str(
+            tree.xpath('//h3[contains(@class,"col-md-12")]/text()')
         )
-        serial_node = tree.xpath(self._SERIAL_STATUS_XPATH)
-        result["serial_status"] = (
-            serial_node[0].xpath("string()").replace("作品状态：", "").strip()
-            if serial_node
-            else ""
+        author = self._first_str(
+            tree.xpath('//h5[contains(@class,"text-warning")]/text()')
         )
-        type_node = tree.xpath(self._TYPE_XPATH)
-        result["type"] = (
-            type_node[0].xpath("string()").replace("作品分类：", "").strip()
-            if type_node
-            else ""
+        cover_url = self.BASE_URL + self._first_str(
+            tree.xpath('//img[contains(@class,"img-responsive")]/@src')
         )
-        result["summary"] = tree.xpath(self._SUMMARY_XPATH).strip()
-        volumes = []
-        volume_nodes = tree.xpath(self._VOLUME_NODE_XPATH)
-        if volume_nodes:
-            for volume_node in volume_nodes:
-                title_node = volume_node.xpath(self._VOLUME_TITLE_XPATH)
-                volume_name = title_node[0].strip() if title_node else "未命名卷"
-                chapter_nodes = volume_node.xpath(self._CHAPTER_NODE_XPATH)
-                chapters = []
-                for chap in chapter_nodes:
-                    title = chap.xpath("string()").strip()
-                    url = chap.get("href", "")
-                    chapter_id = url.split("id=")[-1] if "id=" in url else ""
-                    chapters.append(
-                        {
-                            "title": title,
-                            "url": url,
-                            "chapterId": chapter_id,
-                        }
+        update_time = self._first_str(
+            tree.xpath('//p[contains(text(),"更新时间：")]/text()'),
+            replaces=[("更新时间：", "")],
+        )
+        serial_status = self._first_str(
+            tree.xpath('//p[contains(text(),"作品状态：")]/text()'),
+            replaces=[("作品状态：", "")],
+        )
+        book_type = self._first_str(
+            tree.xpath('//p[contains(text(),"作品分类：")]/text()'),
+            replaces=[("作品分类：", "")],
+        )
+        summary = self._first_str([tree.xpath('string(//div[@id="w0-collapse1"]/div)')])
+        # volumes & chapters
+        volumes: list[VolumeInfoDict] = []
+        for volume_node in tree.xpath(
+            '//div[contains(@class,"panel-info") and contains(@class,"panel-default")]'
+        ):
+            volume_name = (
+                self._first_str(
+                    volume_node.xpath(
+                        './/div[contains(@class,"panel-heading")]//a/text()'
                     )
-                volumes.append(
-                    {
-                        "volume_name": volume_name,
-                        "chapters": chapters,
-                    }
                 )
+                or "未命名卷"
+            )
+            chapters: list[ChapterInfoDict] = []
+            for chap in volume_node.xpath(
+                './/div[contains(@class,"panel-body")]//a[contains(@href,"view-chapter")]'
+            ):
+                title = self._first_str([chap.xpath("string()")])
+                url = chap.get("href", "")
+                chapter_id = url.split("id=")[-1]
+                chapters.append({"title": title, "url": url, "chapterId": chapter_id})
+            volumes.append({"volume_name": volume_name, "chapters": chapters})
-        else:
-            # fallback: flat list
-            chapter_nodes = tree.xpath(self._CHAPTER_FLAT_XPATH)
+        # fallback: flat chapter list
+        if not volumes:
             chapters = []
-            for chap in chapter_nodes:
-                title = chap.xpath("string()").strip()
+            for chap in tree.xpath(
+                '//div[@class="panel-body"]//a[contains(@href,"view-chapter")]'
+            ):
+                title = self._first_str([chap.xpath("string()")])
                 url = chap.get("href", "")
                 chapter_id = url.split("id=")[-1] if "id=" in url else ""
-                chapters.append(
-                    {
-                        "title": title,
-                        "url": url,
-                        "chapterId": chapter_id,
-                    }
-                )
-            volumes = [
-                {
-                    "volume_name": "单卷",
-                    "chapters": chapters,
-                }
-            ]
+                chapters.append({"title": title, "url": url, "chapterId": chapter_id})
+            volumes = [{"volume_name": "单卷", "chapters": chapters}]
-        result["volumes"] = volumes
-        return result
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "serial_status": serial_status,
+            "tags": [book_type],
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
     def parse_chapter(
         self,
@@ -160,32 +116,32 @@ class YamiboParser(BaseParser):
         chapter_id: str,
         **kwargs: Any,
     ) -> ChapterDict | None:
-        """
-        Parse a single chapter page and extract clean text or simplified HTML.
-        :param html_list: Raw HTML of the chapter page.
-        :param chapter_id: Identifier of the chapter being parsed.
-        :return: Cleaned chapter content as plain text or minimal HTML.
-        """
         if not html_list:
             return None
         tree = html.fromstring(html_list[0])
-        content_lines = tree.xpath(self._CHAPTER_CONTENT_XPATH)
-        content = "\n\n".join(line.strip() for line in content_lines if line.strip())
+        content_lines = tree.xpath("//div[@id='w0-collapse1']//p//text()")
+        content = "\n".join(line.strip() for line in content_lines if line.strip())
         if not content:
             return None
-        title = tree.xpath(self._CHAPTER_TITLE_XPATH).strip()
-        update_node = tree.xpath(self._CHAPTER_TIME_XPATH)
-        updated_at = (
-            update_node[0].text.strip().replace("更新时间：", "") if update_node else ""
+        title = self._first_str(
+            [tree.xpath("string(//section[contains(@class,'col-md-9')]//h3)")]
         )
-        word_node = tree.xpath(self._CHAPTER_WORD_COUNT_XPATH)
-        word = word_node[0].text.strip().replace("章节字数：", "") if word_node else ""
-        word_count = int(word) if word.isdigit() else 0
+        updated_at = self._first_str(
+            tree.xpath(
+                "//div[contains(@class,'row')]//div[contains(text(),'更新时间')]/text()"
+            ),
+            replaces=[("更新时间：", "")],
+        )
+        word_str = self._first_str(
+            tree.xpath(
+                "//div[contains(@class,'row')]//div[contains(text(),'章节字数')]/text()"
+            ),
+            replaces=[("章节字数：", "")],
+        )
+        word_count = int(word_str) if word_str.isdigit() else 0
         return {
             "id": chapter_id,

novel_downloader/core/parsers/yibige.py ADDED Viewed

@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.yibige
+------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["yibige"],
+)
+class YibigeParser(BaseParser):
+    """
+    Parser for 一笔阁 book pages.
+    """
+    ADS = {
+        "首发无广告",
+        "请分享",
+        "读之阁",
+        "小说网",
+        "首发地址",
+        "手机阅读",
+        "一笔阁",
+        "site_con_ad(",
+        "chapter_content(",
+    }
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if len(html_list) < 2:
+            return None
+        # Parse trees
+        info_tree = html.fromstring(html_list[0])
+        catalog_tree = html.fromstring(html_list[1])
+        # --- From <meta> data ---
+        book_name = self._meta(info_tree, "og:novel:book_name") or self._first_str(
+            info_tree.xpath("//div[@id='info']/h1/text()")
+        )
+        author = self._meta(info_tree, "og:novel:author") or self._first_str(
+            info_tree.xpath("//div[@id='info']/p[a]/a/text()")
+        )
+        cover_url = self._meta(info_tree, "og:image") or self._first_str(
+            info_tree.xpath("//div[@id='fmimg']//img/@src")
+        )
+        update_time = self._meta(info_tree, "og:novel:update_time").replace("T", " ")
+        serial_status = self._meta(info_tree, "og:novel:status") or "连载中"
+        word_count = self._first_str(
+            info_tree.xpath("//div[@id='info']/p[contains(., '字数：')]/text()[1]"),
+            replaces=[("字数：", "")],
+        )
+        # Summary: first paragraph under #intro
+        summary = self._first_str(info_tree.xpath("//div[@id='intro']//p[1]/text()"))
+        # Category and tags
+        book_type = self._meta(info_tree, "og:novel:category")
+        tags_set = set(self._meta_all(info_tree, "book:tag"))
+        if book_type:
+            tags_set.add(book_type)
+        tags = list(tags_set)
+        # --- Chapters from the catalog page ---
+        chapters: list[ChapterInfoDict] = []
+        for a in catalog_tree.xpath("//div[@id='list']/dl/dd/a"):
+            href = (a.get("href") or "").strip()
+            if not href:
+                continue
+            title = (a.text_content() or "").strip()
+            if not title:
+                continue
+            # /6238/2496.html -> 2496
+            chap_id = href.split("/")[-1].split(".")[0]
+            chapters.append({"title": title, "url": href, "chapterId": chap_id})
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "serial_status": serial_status,
+            "word_count": word_count,
+            "summary": summary,
+            "tags": tags,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
+        paragraphs: list[str] = []
+        for p in tree.xpath("//div[@id='content']//p"):
+            txt = self._norm_space(p.text_content())
+            if not txt or self._is_ad(txt):
+                continue
+            paragraphs.append(txt)
+        content = "\n".join(paragraphs).strip()
+        if not content:
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "yibige"},
+        }
+    def _is_ad(self, s: str) -> bool:
+        """
+        Filter for footer junk inside #content.
+        """
+        if self._is_ad_line(s):
+            return True
+        ss = s.replace(" ", "")
+        # return any(b in s or b in ss for b in self.ADS)
+        return self._is_ad_line(ss)
+    @classmethod
+    def _meta(cls, tree: html.HtmlElement, prop: str) -> str:
+        """
+        Get a single meta property content
+        """
+        return cls._first_str(tree.xpath(f"//meta[@property='{prop}']/@content"))
+    @staticmethod
+    def _meta_all(tree: html.HtmlElement, prop: str) -> list[str]:
+        """
+        Get all meta property content values
+        """
+        return tree.xpath(f"//meta[@property='{prop}']/@content") or []

novel_downloader/core/searchers/__init__.py CHANGED Viewed

@@ -3,18 +3,49 @@
 novel_downloader.core.searchers
 -------------------------------
+Site-specific searcher implementations for discovering novels across multiple sources
 """
 __all__ = [
     "search",
+    "AaatxtSearcher",
     "BiqugeSearcher",
+    "DxmwxSearcher",
+    "EightnovelSearcher",
     "EsjzoneSearcher",
+    "HetushuSearcher",
+    "I25zwSearcher",
+    "Ixdzs8Searcher",
+    "Jpxs123Searcher",
+    "PiaotiaSearcher",
+    "QbtrSearcher",
     "QianbiSearcher",
-    "QidianSearcher",
+    "Quanben5Searcher",
+    "ShuhaigeSearcher",
+    "TongrenquanSearcher",
+    "TtkanSearcher",
+    "XiaoshuowuSearcher",
+    "XiguashuwuSearcher",
+    "Xs63bSearcher",
 ]
-from .biquge import BiqugeSearcher
+from .aaatxt import AaatxtSearcher
+from .b520 import BiqugeSearcher
+from .dxmwx import DxmwxSearcher
+from .eightnovel import EightnovelSearcher
 from .esjzone import EsjzoneSearcher
+from .hetushu import HetushuSearcher
+from .i25zw import I25zwSearcher
+from .ixdzs8 import Ixdzs8Searcher
+from .jpxs123 import Jpxs123Searcher
+from .piaotia import PiaotiaSearcher
+from .qbtr import QbtrSearcher
 from .qianbi import QianbiSearcher
-from .qidian import QidianSearcher
+from .quanben5 import Quanben5Searcher
 from .registry import search
+from .shuhaige import ShuhaigeSearcher
+from .tongrenquan import TongrenquanSearcher
+from .ttkan import TtkanSearcher
+from .xiaoshuowu import XiaoshuowuSearcher
+from .xiguashuwu import XiguashuwuSearcher
+from .xs63b import Xs63bSearcher

novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

novel-downloader 1.5.0py3-none-any.whl → 2.0.0py3-none-any.whl