PyPI - novel-downloader - Versions diffs - 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

novel-downloader 1.4.5py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +2 -4
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +27 -104
novel_downloader/cli/download.py +78 -66
novel_downloader/cli/export.py +20 -21
novel_downloader/cli/main.py +3 -1
novel_downloader/cli/search.py +120 -0
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +10 -14
novel_downloader/config/adapter.py +195 -99
novel_downloader/config/{loader.py → file_io.py} +53 -27
novel_downloader/core/__init__.py +14 -13
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/archived/qidian/searcher.py +79 -0
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +8 -30
novel_downloader/core/downloaders/base.py +182 -30
novel_downloader/core/downloaders/common.py +217 -384
novel_downloader/core/downloaders/qianbi.py +332 -4
novel_downloader/core/downloaders/qidian.py +250 -290
novel_downloader/core/downloaders/registry.py +69 -0
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +8 -26
novel_downloader/core/exporters/base.py +107 -31
novel_downloader/core/exporters/common/__init__.py +3 -4
novel_downloader/core/exporters/common/epub.py +92 -171
novel_downloader/core/exporters/common/main_exporter.py +14 -67
novel_downloader/core/exporters/common/txt.py +90 -86
novel_downloader/core/exporters/epub_util.py +184 -1327
novel_downloader/core/exporters/linovelib/__init__.py +3 -2
novel_downloader/core/exporters/linovelib/epub.py +165 -222
novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
novel_downloader/core/exporters/linovelib/txt.py +76 -66
novel_downloader/core/exporters/qidian.py +15 -11
novel_downloader/core/exporters/registry.py +55 -0
novel_downloader/core/exporters/txt_util.py +67 -0
novel_downloader/core/fetchers/__init__.py +57 -56
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
novel_downloader/core/fetchers/biquyuedu.py +83 -0
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +60 -0
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +8 -14
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +4 -17
novel_downloader/core/interfaces/parser.py +5 -6
novel_downloader/core/interfaces/searcher.py +26 -0
novel_downloader/core/parsers/__init__.py +58 -22
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +63 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
novel_downloader/core/parsers/qidian/__init__.py +2 -2
novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
novel_downloader/core/parsers/qidian/main_parser.py +19 -57
novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +57 -0
novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +435 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +155 -0
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +51 -0
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/b520.py +84 -0
novel_downloader/core/searchers/base.py +168 -0
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +102 -0
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +165 -0
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +79 -0
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +36 -79
novel_downloader/locales/zh.json +37 -80
novel_downloader/models/__init__.py +23 -50
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +16 -43
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +21 -0
novel_downloader/resources/config/settings.toml +39 -74
novel_downloader/resources/css_styles/intro.css +83 -0
novel_downloader/resources/css_styles/main.css +30 -89
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +43 -0
novel_downloader/utils/chapter_storage.py +247 -226
novel_downloader/utils/constants.py +5 -50
novel_downloader/utils/cookies.py +6 -18
novel_downloader/utils/crypto_utils/__init__.py +13 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
novel_downloader/utils/epub/__init__.py +34 -0
novel_downloader/utils/epub/builder.py +377 -0
novel_downloader/utils/epub/constants.py +118 -0
novel_downloader/utils/epub/documents.py +297 -0
novel_downloader/utils/epub/models.py +120 -0
novel_downloader/utils/epub/utils.py +179 -0
novel_downloader/utils/file_utils/__init__.py +5 -30
novel_downloader/utils/file_utils/io.py +9 -150
novel_downloader/utils/file_utils/normalize.py +2 -2
novel_downloader/utils/file_utils/sanitize.py +2 -7
novel_downloader/utils/fontocr.py +207 -0
novel_downloader/utils/i18n.py +2 -0
novel_downloader/utils/logger.py +10 -16
novel_downloader/utils/network.py +111 -252
novel_downloader/utils/state.py +5 -90
novel_downloader/utils/text_utils/__init__.py +16 -21
novel_downloader/utils/text_utils/diff_display.py +6 -9
novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
novel_downloader/utils/text_utils/text_cleaner.py +179 -0
novel_downloader/utils/text_utils/truncate_utils.py +62 -0
novel_downloader/utils/time_utils/__init__.py +6 -12
novel_downloader/utils/time_utils/datetime_utils.py +23 -33
novel_downloader/utils/time_utils/sleep_utils.py +5 -10
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.0.dist-info/METADATA +171 -0
novel_downloader-2.0.0.dist-info/RECORD +210 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
novel_downloader/config/site_rules.py +0 -94
novel_downloader/core/downloaders/biquge.py +0 -25
novel_downloader/core/downloaders/esjzone.py +0 -25
novel_downloader/core/downloaders/linovelib.py +0 -25
novel_downloader/core/downloaders/sfacg.py +0 -25
novel_downloader/core/downloaders/yamibo.py +0 -25
novel_downloader/core/exporters/biquge.py +0 -25
novel_downloader/core/exporters/esjzone.py +0 -25
novel_downloader/core/exporters/qianbi.py +0 -25
novel_downloader/core/exporters/sfacg.py +0 -25
novel_downloader/core/exporters/yamibo.py +0 -25
novel_downloader/core/factory/__init__.py +0 -20
novel_downloader/core/factory/downloader.py +0 -73
novel_downloader/core/factory/exporter.py +0 -58
novel_downloader/core/factory/fetcher.py +0 -96
novel_downloader/core/factory/parser.py +0 -86
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -403
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/common/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -204
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -193
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -318
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -189
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -229
novel_downloader/core/parsers/biquge/__init__.py +0 -10
novel_downloader/core/parsers/biquge/main_parser.py +0 -134
novel_downloader/core/parsers/common/__init__.py +0 -13
novel_downloader/core/parsers/common/helper.py +0 -323
novel_downloader/core/parsers/common/main_parser.py +0 -106
novel_downloader/core/parsers/esjzone/__init__.py +0 -10
novel_downloader/core/parsers/linovelib/__init__.py +0 -10
novel_downloader/core/parsers/qianbi/__init__.py +0 -10
novel_downloader/core/parsers/sfacg/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
novel_downloader/models/browser.py +0 -21
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/site_rules.py +0 -99
novel_downloader/models/tasks.py +0 -33
novel_downloader/models/types.py +0 -15
novel_downloader/resources/css_styles/volume-intro.css +0 -56
novel_downloader/resources/json/replace_word_map.json +0 -4
novel_downloader/resources/text/blacklist.txt +0 -22
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/fontocr/__init__.py +0 -22
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -303
novel_downloader/utils/fontocr/ocr_v2.py +0 -752
novel_downloader/utils/hash_store.py +0 -279
novel_downloader/utils/hash_utils.py +0 -103
novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
novel_downloader/utils/text_utils/font_mapping.py +0 -28
novel_downloader/utils/text_utils/text_cleaning.py +0 -107
novel_downloader-1.4.5.dist-info/METADATA +0 -196
novel_downloader-1.4.5.dist-info/RECORD +0 -165
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0

novel_downloader/core/parsers/xshbook.py ADDED Viewed

@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.xshbook
+-------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["xshbook"],
+)
+class XshbookParser(BaseParser):
+    """Parser for 小说虎 book pages."""
+    BASE = "http://www.xshbook.com"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        book_name = self._first_str(tree.xpath("//div[@id='info']/h1/text()"))
+        author = self._first_str(
+            tree.xpath("//div[@id='info']/p[1]/text()"),
+            replaces=[("\xa0", ""), ("作者:", "")],
+        )
+        update_time = self._first_str(
+            tree.xpath("//meta[@property='og:novel:update_time']/@content")
+        )
+        summary = "\n".join(
+            self._first_str(p.xpath("string()").splitlines())
+            for p in tree.xpath("//div[@id='intro']//p")
+        ).strip()
+        summary = summary.split("本站提示", 1)[0].strip()
+        cover_url = self._first_str(tree.xpath("//div[@id='fmimg']//img/@src"))
+        book_type = self._first_str(tree.xpath("//div[@class='con_top']/a[2]/text()"))
+        tags: list[str] = [book_type] if book_type else []
+        chapters: list[ChapterInfoDict] = []
+        for a in tree.xpath("//div[@id='list']//dd/a"):
+            href = a.get("href", "")
+            title = self._norm_space(a.text_content())
+            # /95071/95071941/389027455.html -> "389027455"
+            chapter_id = href.rsplit("/", 1)[-1].split(".", 1)[0]
+            chapters.append({"title": title, "url": href, "chapterId": chapter_id})
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "summary": summary,
+            "tags": tags,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
+        if not title:
+            title = self._first_str(
+                tree.xpath("//div[@class='con_top']/text()[last()]")
+            )
+        cont_nodes = tree.xpath("//div[@id='content']")
+        if not cont_nodes:
+            return None
+        cont = cont_nodes[0]
+        # remove scripts under content
+        for s in cont.xpath(".//script"):
+            s.getparent().remove(s)
+        paragraphs: list[str] = []
+        for p in cont.xpath(".//p"):
+            text = html.tostring(p, method="text", encoding="unicode")
+            text = text.replace("\xa0", " ")
+            # filter boilerplate lines
+            bad = (
+                "谨记我们的网址" in text
+                or "温馨提示" in text
+                or "提示" in text
+                and "本文" not in text
+                and len(text) < 60
+                or "分享" in text
+                and len(text) < 40
+            )
+            if not bad:
+                paragraphs.append(text)
+        content = "\n".join(self._norm_space(p) for p in paragraphs if p.strip())
+        if not content.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "xshbook"},
+        }

novel_downloader/core/parsers/yamibo.py ADDED Viewed

@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.yamibo
+------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["yamibo"],
+)
+class YamiboParser(BaseParser):
+    """
+    Parser for 百合会 book pages.
+    """
+    BASE_URL = "https://www.yamibo.com"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        book_name = self._first_str(
+            tree.xpath('//h3[contains(@class,"col-md-12")]/text()')
+        )
+        author = self._first_str(
+            tree.xpath('//h5[contains(@class,"text-warning")]/text()')
+        )
+        cover_url = self.BASE_URL + self._first_str(
+            tree.xpath('//img[contains(@class,"img-responsive")]/@src')
+        )
+        update_time = self._first_str(
+            tree.xpath('//p[contains(text(),"更新时间：")]/text()'),
+            replaces=[("更新时间：", "")],
+        )
+        serial_status = self._first_str(
+            tree.xpath('//p[contains(text(),"作品状态：")]/text()'),
+            replaces=[("作品状态：", "")],
+        )
+        book_type = self._first_str(
+            tree.xpath('//p[contains(text(),"作品分类：")]/text()'),
+            replaces=[("作品分类：", "")],
+        )
+        summary = self._first_str([tree.xpath('string(//div[@id="w0-collapse1"]/div)')])
+        # volumes & chapters
+        volumes: list[VolumeInfoDict] = []
+        for volume_node in tree.xpath(
+            '//div[contains(@class,"panel-info") and contains(@class,"panel-default")]'
+        ):
+            volume_name = (
+                self._first_str(
+                    volume_node.xpath(
+                        './/div[contains(@class,"panel-heading")]//a/text()'
+                    )
+                )
+                or "未命名卷"
+            )
+            chapters: list[ChapterInfoDict] = []
+            for chap in volume_node.xpath(
+                './/div[contains(@class,"panel-body")]//a[contains(@href,"view-chapter")]'
+            ):
+                title = self._first_str([chap.xpath("string()")])
+                url = chap.get("href", "")
+                chapter_id = url.split("id=")[-1]
+                chapters.append({"title": title, "url": url, "chapterId": chapter_id})
+            volumes.append({"volume_name": volume_name, "chapters": chapters})
+        # fallback: flat chapter list
+        if not volumes:
+            chapters = []
+            for chap in tree.xpath(
+                '//div[@class="panel-body"]//a[contains(@href,"view-chapter")]'
+            ):
+                title = self._first_str([chap.xpath("string()")])
+                url = chap.get("href", "")
+                chapter_id = url.split("id=")[-1] if "id=" in url else ""
+                chapters.append({"title": title, "url": url, "chapterId": chapter_id})
+            volumes = [{"volume_name": "单卷", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "serial_status": serial_status,
+            "tags": [book_type],
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        content_lines = tree.xpath("//div[@id='w0-collapse1']//p//text()")
+        content = "\n".join(line.strip() for line in content_lines if line.strip())
+        if not content:
+            return None
+        title = self._first_str(
+            [tree.xpath("string(//section[contains(@class,'col-md-9')]//h3)")]
+        )
+        updated_at = self._first_str(
+            tree.xpath(
+                "//div[contains(@class,'row')]//div[contains(text(),'更新时间')]/text()"
+            ),
+            replaces=[("更新时间：", "")],
+        )
+        word_str = self._first_str(
+            tree.xpath(
+                "//div[contains(@class,'row')]//div[contains(text(),'章节字数')]/text()"
+            ),
+            replaces=[("章节字数：", "")],
+        )
+        word_count = int(word_str) if word_str.isdigit() else 0
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {
+                "site": "yamibo",
+                "word_count": word_count,
+                "updated_at": updated_at,
+            },
+        }

novel_downloader/core/parsers/yibige.py ADDED Viewed

@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.yibige
+------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["yibige"],
+)
+class YibigeParser(BaseParser):
+    """
+    Parser for 一笔阁 book pages.
+    """
+    ADS = {
+        "首发无广告",
+        "请分享",
+        "读之阁",
+        "小说网",
+        "首发地址",
+        "手机阅读",
+        "一笔阁",
+        "site_con_ad(",
+        "chapter_content(",
+    }
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if len(html_list) < 2:
+            return None
+        # Parse trees
+        info_tree = html.fromstring(html_list[0])
+        catalog_tree = html.fromstring(html_list[1])
+        # --- From <meta> data ---
+        book_name = self._meta(info_tree, "og:novel:book_name") or self._first_str(
+            info_tree.xpath("//div[@id='info']/h1/text()")
+        )
+        author = self._meta(info_tree, "og:novel:author") or self._first_str(
+            info_tree.xpath("//div[@id='info']/p[a]/a/text()")
+        )
+        cover_url = self._meta(info_tree, "og:image") or self._first_str(
+            info_tree.xpath("//div[@id='fmimg']//img/@src")
+        )
+        update_time = self._meta(info_tree, "og:novel:update_time").replace("T", " ")
+        serial_status = self._meta(info_tree, "og:novel:status") or "连载中"
+        word_count = self._first_str(
+            info_tree.xpath("//div[@id='info']/p[contains(., '字数：')]/text()[1]"),
+            replaces=[("字数：", "")],
+        )
+        # Summary: first paragraph under #intro
+        summary = self._first_str(info_tree.xpath("//div[@id='intro']//p[1]/text()"))
+        # Category and tags
+        book_type = self._meta(info_tree, "og:novel:category")
+        tags_set = set(self._meta_all(info_tree, "book:tag"))
+        if book_type:
+            tags_set.add(book_type)
+        tags = list(tags_set)
+        # --- Chapters from the catalog page ---
+        chapters: list[ChapterInfoDict] = []
+        for a in catalog_tree.xpath("//div[@id='list']/dl/dd/a"):
+            href = (a.get("href") or "").strip()
+            if not href:
+                continue
+            title = (a.text_content() or "").strip()
+            if not title:
+                continue
+            # /6238/2496.html -> 2496
+            chap_id = href.split("/")[-1].split(".")[0]
+            chapters.append({"title": title, "url": href, "chapterId": chap_id})
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "serial_status": serial_status,
+            "word_count": word_count,
+            "summary": summary,
+            "tags": tags,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(tree.xpath("//div[@class='bookname']/h1/text()"))
+        paragraphs: list[str] = []
+        for p in tree.xpath("//div[@id='content']//p"):
+            txt = self._norm_space(p.text_content())
+            if not txt or self._is_ad(txt):
+                continue
+            paragraphs.append(txt)
+        content = "\n".join(paragraphs).strip()
+        if not content:
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "yibige"},
+        }
+    def _is_ad(self, s: str) -> bool:
+        """
+        Filter for footer junk inside #content.
+        """
+        if self._is_ad_line(s):
+            return True
+        ss = s.replace(" ", "")
+        # return any(b in s or b in ss for b in self.ADS)
+        return self._is_ad_line(ss)
+    @classmethod
+    def _meta(cls, tree: html.HtmlElement, prop: str) -> str:
+        """
+        Get a single meta property content
+        """
+        return cls._first_str(tree.xpath(f"//meta[@property='{prop}']/@content"))
+    @staticmethod
+    def _meta_all(tree: html.HtmlElement, prop: str) -> list[str]:
+        """
+        Get all meta property content values
+        """
+        return tree.xpath(f"//meta[@property='{prop}']/@content") or []

novel_downloader/core/searchers/__init__.py ADDED Viewed

@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.searchers
+-------------------------------
+Site-specific searcher implementations for discovering novels across multiple sources
+"""
+__all__ = [
+    "search",
+    "AaatxtSearcher",
+    "BiqugeSearcher",
+    "DxmwxSearcher",
+    "EightnovelSearcher",
+    "EsjzoneSearcher",
+    "HetushuSearcher",
+    "I25zwSearcher",
+    "Ixdzs8Searcher",
+    "Jpxs123Searcher",
+    "PiaotiaSearcher",
+    "QbtrSearcher",
+    "QianbiSearcher",
+    "Quanben5Searcher",
+    "ShuhaigeSearcher",
+    "TongrenquanSearcher",
+    "TtkanSearcher",
+    "XiaoshuowuSearcher",
+    "XiguashuwuSearcher",
+    "Xs63bSearcher",
+]
+from .aaatxt import AaatxtSearcher
+from .b520 import BiqugeSearcher
+from .dxmwx import DxmwxSearcher
+from .eightnovel import EightnovelSearcher
+from .esjzone import EsjzoneSearcher
+from .hetushu import HetushuSearcher
+from .i25zw import I25zwSearcher
+from .ixdzs8 import Ixdzs8Searcher
+from .jpxs123 import Jpxs123Searcher
+from .piaotia import PiaotiaSearcher
+from .qbtr import QbtrSearcher
+from .qianbi import QianbiSearcher
+from .quanben5 import Quanben5Searcher
+from .registry import search
+from .shuhaige import ShuhaigeSearcher
+from .tongrenquan import TongrenquanSearcher
+from .ttkan import TtkanSearcher
+from .xiaoshuowu import XiaoshuowuSearcher
+from .xiguashuwu import XiguashuwuSearcher
+from .xs63b import Xs63bSearcher

novel_downloader/core/searchers/aaatxt.py ADDED Viewed

@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.searchers.aaatxt
+--------------------------------------
+"""
+import logging
+from lxml import html
+from novel_downloader.core.searchers.base import BaseSearcher
+from novel_downloader.core.searchers.registry import register_searcher
+from novel_downloader.models import SearchResult
+logger = logging.getLogger(__name__)
+@register_searcher(
+    site_keys=["aaatxt"],
+)
+class AaatxtSearcher(BaseSearcher):
+    site_name = "aaatxt"
+    priority = 500
+    SEARCH_URL = "http://www.aaatxt.com/search.php"
+    @classmethod
+    async def _fetch_html(cls, keyword: str) -> str:
+        # gbk / gb2312
+        params = {
+            "keyword": cls._quote(keyword, encoding="gb2312", errors="replace"),
+            "submit": cls._quote("搜 索", encoding="gb2312", errors="replace"),
+        }
+        full_url = cls._build_url(cls.SEARCH_URL, params)  # need build manually
+        headers = {
+            "Host": "www.aaatxt.com",
+            "Referer": "http://www.aaatxt.com/",
+        }
+        try:
+            async with (await cls._http_get(full_url, headers=headers)) as resp:
+                return await cls._response_to_str(resp, "gb2312")
+        except Exception:
+            logger.error(
+                "Failed to fetch HTML for keyword '%s' from '%s'",
+                keyword,
+                cls.SEARCH_URL,
+            )
+            return ""
+    @classmethod
+    def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
+        doc = html.fromstring(html_str)
+        rows = doc.xpath("//div[@class='sort']//div[@class='list']/table")
+        results: list[SearchResult] = []
+        for idx, row in enumerate(rows):
+            href = cls._first_str(row.xpath(".//td[@class='name']/h3/a/@href"))
+            if not href:
+                continue
+            if limit is not None and idx >= limit:
+                break
+            book_id = href.split("/")[-1].split(".")[0]
+            book_url = cls._abs_url(href)
+            cover_rel = cls._first_str(row.xpath(".//td[@class='cover']/a/img/@src"))
+            cover_url = cls._abs_url(cover_rel) if cover_rel else ""
+            title = cls._first_str(row.xpath(".//td[@class='name']/h3/a//text()"))
+            size_text = row.xpath("string(.//td[@class='size'])")
+            size_norm = size_text.replace("\u00a0", " ").replace("&nbsp;", " ").strip()
+            tokens = [t for t in size_norm.split() if t]
+            word_count = "-"
+            author = "-"
+            for tok in tokens:
+                if tok.startswith("大小:"):
+                    word_count = tok.split(":", 1)[1].strip()
+                elif tok.startswith("上传:"):
+                    author = tok.split(":", 1)[1].strip()
+            intro_text = row.xpath("string(.//td[@class='intro'])")
+            intro_norm = intro_text.replace("\u00a0", " ").replace("&nbsp;", " ")
+            update_date = "-"
+            for marker in ("更新:", "更新："):
+                if marker in intro_norm:
+                    tail = intro_norm.split(marker, 1)[1].strip()
+                    update_date = tail.split()[0] if tail else "-"
+                    break
+            results.append(
+                SearchResult(
+                    site=cls.site_name,
+                    book_id=book_id,
+                    book_url=book_url,
+                    cover_url=cover_url,
+                    title=title,
+                    author=author,
+                    latest_chapter="-",
+                    update_date=update_date,
+                    word_count=word_count,
+                    priority=cls.priority + idx,
+                )
+            )
+        return results

novel_downloader/core/searchers/b520.py ADDED Viewed

@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.searchers.b520
+------------------------------------
+"""
+import logging
+from lxml import html
+from novel_downloader.core.searchers.base import BaseSearcher
+from novel_downloader.core.searchers.registry import register_searcher
+from novel_downloader.models import SearchResult
+logger = logging.getLogger(__name__)
+@register_searcher(
+    site_keys=["biquge", "bqg", "b520"],
+)
+class BiqugeSearcher(BaseSearcher):
+    site_name = "biquge"
+    priority = 30
+    BASE_URL = "http://www.b520.cc/"
+    SEARCH_URL = "http://www.b520.cc/modules/article/search.php"
+    @classmethod
+    async def _fetch_html(cls, keyword: str) -> str:
+        params = {"searchkey": keyword}
+        try:
+            async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
+                return await cls._response_to_str(resp)
+        except Exception:
+            logger.error(
+                "Failed to fetch HTML for keyword '%s' from '%s'",
+                keyword,
+                cls.SEARCH_URL,
+            )
+            return ""
+    @classmethod
+    def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
+        doc = html.fromstring(html_str)
+        rows = doc.xpath('//table[@class="grid"]//tr[position()>1]')
+        results: list[SearchResult] = []
+        for idx, row in enumerate(rows):
+            href = cls._first_str(row.xpath(".//td[1]/a[1]/@href"))
+            if not href:
+                continue
+            if limit is not None and idx >= limit:
+                break
+            book_id = href.strip("/").split("/")[-1]
+            book_url = cls._abs_url(href)
+            title = cls._first_str(row.xpath(".//td[1]/a[1]/text()"))
+            latest_chapter = cls._first_str(row.xpath(".//td[2]/a[1]/text()")) or "-"
+            author = cls._first_str(row.xpath(".//td[3]//text()"))
+            word_count = cls._first_str(row.xpath(".//td[4]//text()"))
+            update_date = cls._first_str(row.xpath(".//td[5]//text()"))
+            # Compute priority
+            prio = cls.priority + idx
+            results.append(
+                SearchResult(
+                    site=cls.site_name,
+                    book_id=book_id,
+                    book_url=book_url,
+                    cover_url="",
+                    title=title,
+                    author=author,
+                    latest_chapter=latest_chapter,
+                    update_date=update_date,
+                    word_count=word_count,
+                    priority=prio,
+                )
+            )
+        return results

novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

novel-downloader 1.4.5py3-none-any.whl → 2.0.0py3-none-any.whl