PyPI - novel-downloader - Versions diffs - 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

novel-downloader 1.4.5py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +2 -4
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +27 -104
novel_downloader/cli/download.py +78 -66
novel_downloader/cli/export.py +20 -21
novel_downloader/cli/main.py +3 -1
novel_downloader/cli/search.py +120 -0
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +10 -14
novel_downloader/config/adapter.py +195 -99
novel_downloader/config/{loader.py → file_io.py} +53 -27
novel_downloader/core/__init__.py +14 -13
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/archived/qidian/searcher.py +79 -0
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +8 -30
novel_downloader/core/downloaders/base.py +182 -30
novel_downloader/core/downloaders/common.py +217 -384
novel_downloader/core/downloaders/qianbi.py +332 -4
novel_downloader/core/downloaders/qidian.py +250 -290
novel_downloader/core/downloaders/registry.py +69 -0
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +8 -26
novel_downloader/core/exporters/base.py +107 -31
novel_downloader/core/exporters/common/__init__.py +3 -4
novel_downloader/core/exporters/common/epub.py +92 -171
novel_downloader/core/exporters/common/main_exporter.py +14 -67
novel_downloader/core/exporters/common/txt.py +90 -86
novel_downloader/core/exporters/epub_util.py +184 -1327
novel_downloader/core/exporters/linovelib/__init__.py +3 -2
novel_downloader/core/exporters/linovelib/epub.py +165 -222
novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
novel_downloader/core/exporters/linovelib/txt.py +76 -66
novel_downloader/core/exporters/qidian.py +15 -11
novel_downloader/core/exporters/registry.py +55 -0
novel_downloader/core/exporters/txt_util.py +67 -0
novel_downloader/core/fetchers/__init__.py +57 -56
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
novel_downloader/core/fetchers/biquyuedu.py +83 -0
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +60 -0
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +8 -14
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +4 -17
novel_downloader/core/interfaces/parser.py +5 -6
novel_downloader/core/interfaces/searcher.py +26 -0
novel_downloader/core/parsers/__init__.py +58 -22
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +63 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
novel_downloader/core/parsers/qidian/__init__.py +2 -2
novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
novel_downloader/core/parsers/qidian/main_parser.py +19 -57
novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +57 -0
novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +435 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +155 -0
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +51 -0
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/b520.py +84 -0
novel_downloader/core/searchers/base.py +168 -0
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +102 -0
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +165 -0
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +79 -0
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +36 -79
novel_downloader/locales/zh.json +37 -80
novel_downloader/models/__init__.py +23 -50
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +16 -43
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +21 -0
novel_downloader/resources/config/settings.toml +39 -74
novel_downloader/resources/css_styles/intro.css +83 -0
novel_downloader/resources/css_styles/main.css +30 -89
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +43 -0
novel_downloader/utils/chapter_storage.py +247 -226
novel_downloader/utils/constants.py +5 -50
novel_downloader/utils/cookies.py +6 -18
novel_downloader/utils/crypto_utils/__init__.py +13 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
novel_downloader/utils/epub/__init__.py +34 -0
novel_downloader/utils/epub/builder.py +377 -0
novel_downloader/utils/epub/constants.py +118 -0
novel_downloader/utils/epub/documents.py +297 -0
novel_downloader/utils/epub/models.py +120 -0
novel_downloader/utils/epub/utils.py +179 -0
novel_downloader/utils/file_utils/__init__.py +5 -30
novel_downloader/utils/file_utils/io.py +9 -150
novel_downloader/utils/file_utils/normalize.py +2 -2
novel_downloader/utils/file_utils/sanitize.py +2 -7
novel_downloader/utils/fontocr.py +207 -0
novel_downloader/utils/i18n.py +2 -0
novel_downloader/utils/logger.py +10 -16
novel_downloader/utils/network.py +111 -252
novel_downloader/utils/state.py +5 -90
novel_downloader/utils/text_utils/__init__.py +16 -21
novel_downloader/utils/text_utils/diff_display.py +6 -9
novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
novel_downloader/utils/text_utils/text_cleaner.py +179 -0
novel_downloader/utils/text_utils/truncate_utils.py +62 -0
novel_downloader/utils/time_utils/__init__.py +6 -12
novel_downloader/utils/time_utils/datetime_utils.py +23 -33
novel_downloader/utils/time_utils/sleep_utils.py +5 -10
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.0.dist-info/METADATA +171 -0
novel_downloader-2.0.0.dist-info/RECORD +210 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
novel_downloader/config/site_rules.py +0 -94
novel_downloader/core/downloaders/biquge.py +0 -25
novel_downloader/core/downloaders/esjzone.py +0 -25
novel_downloader/core/downloaders/linovelib.py +0 -25
novel_downloader/core/downloaders/sfacg.py +0 -25
novel_downloader/core/downloaders/yamibo.py +0 -25
novel_downloader/core/exporters/biquge.py +0 -25
novel_downloader/core/exporters/esjzone.py +0 -25
novel_downloader/core/exporters/qianbi.py +0 -25
novel_downloader/core/exporters/sfacg.py +0 -25
novel_downloader/core/exporters/yamibo.py +0 -25
novel_downloader/core/factory/__init__.py +0 -20
novel_downloader/core/factory/downloader.py +0 -73
novel_downloader/core/factory/exporter.py +0 -58
novel_downloader/core/factory/fetcher.py +0 -96
novel_downloader/core/factory/parser.py +0 -86
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -403
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/common/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -204
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -193
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -318
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -189
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -229
novel_downloader/core/parsers/biquge/__init__.py +0 -10
novel_downloader/core/parsers/biquge/main_parser.py +0 -134
novel_downloader/core/parsers/common/__init__.py +0 -13
novel_downloader/core/parsers/common/helper.py +0 -323
novel_downloader/core/parsers/common/main_parser.py +0 -106
novel_downloader/core/parsers/esjzone/__init__.py +0 -10
novel_downloader/core/parsers/linovelib/__init__.py +0 -10
novel_downloader/core/parsers/qianbi/__init__.py +0 -10
novel_downloader/core/parsers/sfacg/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
novel_downloader/models/browser.py +0 -21
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/site_rules.py +0 -99
novel_downloader/models/tasks.py +0 -33
novel_downloader/models/types.py +0 -15
novel_downloader/resources/css_styles/volume-intro.css +0 -56
novel_downloader/resources/json/replace_word_map.json +0 -4
novel_downloader/resources/text/blacklist.txt +0 -22
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/fontocr/__init__.py +0 -22
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -303
novel_downloader/utils/fontocr/ocr_v2.py +0 -752
novel_downloader/utils/hash_store.py +0 -279
novel_downloader/utils/hash_utils.py +0 -103
novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
novel_downloader/utils/text_utils/font_mapping.py +0 -28
novel_downloader/utils/text_utils/text_cleaning.py +0 -107
novel_downloader-1.4.5.dist-info/METADATA +0 -196
novel_downloader-1.4.5.dist-info/RECORD +0 -165
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0

novel_downloader/core/parsers/ixdzs8.py ADDED Viewed

@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.ixdzs8
+------------------------------------
+"""
+import contextlib
+import json
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["ixdzs8"],
+)
+class Ixdzs8Parser(BaseParser):
+    """
+    Parser for 爱下电子书 book pages.
+    """
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if len(html_list) < 2 or not html_list[0] or not html_list[1]:
+            return None
+        # Parse HTML
+        tree = html.fromstring(html_list[0])
+        book_name = self._meta(tree, "og:novel:book_name") or self._first_str(
+            tree.xpath("//div[@class='n-text']/h1/text()")
+        )
+        author = self._meta(tree, "og:novel:author") or self._first_str(
+            tree.xpath("//div[@class='n-text']//a[contains(@class,'bauthor')]/text()")
+        )
+        cover_url = self._meta(tree, "og:image")
+        if not cover_url:
+            cover_url = self._first_str(tree.xpath("//div[@class='n-img']//img/@src"))
+        serial_status = self._meta(tree, "og:novel:status")
+        # 2022-08-25T18:08:03+08:00 -> 2022-08-25 18:08:03
+        iso_time = self._meta(tree, "og:novel:update_time")
+        update_time = ""
+        if iso_time:
+            update_time = iso_time.replace("T", " ").split("+", 1)[0].strip()
+        word_count = self._first_str(
+            tree.xpath("//div[@class='n-text']//span[contains(@class,'nsize')]/text()")
+        )
+        raw_summary = self._meta(tree, "og:description")
+        summary = ""
+        if raw_summary:
+            s = raw_summary.replace("&nbsp;", "")
+            s = s.replace("<br />", "\n")
+            summary = "\n".join(
+                self._norm_space(line) for line in s.splitlines()
+            ).strip()
+        tags = [
+            self._norm_space(t)
+            for t in tree.xpath("//div[contains(@class,'tags')]//em/a/text()")
+            if t and t.strip()
+        ]
+        category = self._meta(tree, "og:novel:category") or self._first_str(
+            tree.xpath("//div[@class='n-text']/p[a[contains(@class,'nsort')]]/a/text()")
+        )
+        if category:
+            tags.append(category)
+        book_path = self._meta(tree, "og:novel:read_url") or self._meta(tree, "og:url")
+        book_id = ""
+        if book_path:
+            book_id = book_path.strip("/").split("/")[-1]
+        data = {}
+        with contextlib.suppress(Exception):
+            data = json.loads(html_list[1])
+        clist = data.get("data", []) if isinstance(data, dict) else []
+        chapters: list[ChapterInfoDict] = []
+        for chap in clist:
+            ordernum = str(chap.get("ordernum", "")).strip()
+            if not ordernum:
+                continue
+            title = self._norm_space(chap.get("title", "") or "") or "未命名章节"
+            url = f"/read/{book_id}/p{ordernum}.html" if book_id else ""
+            chapters.append(
+                {
+                    "url": url,
+                    "title": title,
+                    "chapterId": f"p{ordernum}",
+                }
+            )
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "serial_status": serial_status,
+            "update_time": update_time,
+            "word_count": word_count,
+            "summary": summary,
+            "tags": tags,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(tree.xpath("//div[@class='page-d-top']/h1/text()"))
+        if not title:
+            title = self._first_str(
+                tree.xpath("//article[contains(@class,'page-content')]//h3/text()")
+            )
+        title = self._norm_space(title)
+        # paragraphs within the reading section; skip ad containers
+        ps = tree.xpath(
+            "//article[contains(@class,'page-content')]//section//p[not(contains(@class,'abg'))]"
+        )
+        paragraphs: list[str] = []
+        for p in ps:
+            raw = p.text_content()
+            txt = self._norm_space(raw)
+            if not txt or self._is_ad_line(txt):
+                continue
+            paragraphs.append(txt)
+        if not paragraphs:
+            return None
+        # Replace FIRST line with .replace(title, "")
+        first = paragraphs[0].replace(title, "")
+        first = first.replace(title.replace(" ", ""), "").strip()
+        if first:
+            paragraphs[0] = first
+        else:
+            paragraphs.pop(0)
+        if paragraphs:
+            last = paragraphs[-1]
+            if "本章完" in last:
+                paragraphs.pop()
+        content = "\n".join(paragraphs)
+        if not content.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "ixdzs8"},
+        }
+    @classmethod
+    def _meta(cls, tree: html.HtmlElement, prop: str) -> str:
+        return cls._first_str(tree.xpath(f"//meta[@property='{prop}']/@content"))

novel_downloader/core/parsers/jpxs123.py ADDED Viewed

@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.jpxs123
+-------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["jpxs123"],
+)
+class Jpxs123Parser(BaseParser):
+    """
+    Parser for 精品小说网 book pages.
+    """
+    BASE_URL = "https://www.jpxs123.com"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        # Parse the main info page
+        tree = html.fromstring(html_list[0])
+        # Book name
+        book_name = self._first_str(tree.xpath('//div[@class="infos"]/h1/text()'))
+        # Tags: the second breadcrumb (e.g., "同人小说")
+        tag = self._first_str(
+            tree.xpath('//div[contains(@class,"menNav")]/a[2]/text()')
+        )
+        tags = [tag] if tag else []
+        author = self._first_str(tree.xpath('//div[@class="date"]/span[1]//a/text()'))
+        update_time = self._first_str(
+            tree.xpath('//div[@class="date"]/span[2]/text()'), replaces=[("时间：", "")]
+        )
+        cover_rel = self._first_str(tree.xpath('//div[@class="pic"]/img/@src'))
+        cover_url = (
+            f"{self.BASE_URL}{cover_rel}"
+            if cover_rel and not cover_rel.startswith("http")
+            else cover_rel
+        )
+        # Summary from the <p> inside infos
+        paras = tree.xpath('//div[@class="infos"]/p//text()')
+        summary = "\n".join(p.strip() for p in paras if p.strip())
+        # Chapters from the book_list
+        chapters: list[ChapterInfoDict] = []
+        for a in tree.xpath('//div[contains(@class,"book_list")]//li/a'):
+            url = a.get("href", "").strip()
+            title = a.text_content().strip()
+            # General regex: /{category}/{bookId}/{chapterId}.html
+            cid = url.split("/")[-1].split(".")[0]
+            chapters.append({"title": title, "url": url, "chapterId": cid})
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        # Parse the download page (second HTML)
+        download_url = ""
+        if len(html_list) > 1 and html_list[1]:
+            dtree = html.fromstring(html_list[1])
+            a = dtree.xpath('//a[@id="dowloadnUrl"]')
+            if a:
+                link = a[0].get("link") or a[0].get("href") or ""
+                download_url = self._fix_download_link(link)
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "tags": tags,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {"download_url": download_url},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        raw_title = self._first_str(
+            tree.xpath('//div[contains(@class,"read_chapterName")]//h1/text()')
+        )
+        crumbs = tree.xpath('//div[contains(@class,"readTop")]//a/text()')
+        book_name = crumbs[-1].strip() if crumbs else ""
+        title = raw_title.replace(book_name, "").strip()
+        paragraphs = tree.xpath('//div[contains(@class,"read_chapterDetail")]/p')
+        texts = []
+        for p in paragraphs:
+            txt = p.text_content().strip()
+            if txt:
+                texts.append(txt)
+        content = "\n".join(texts)
+        if not content:
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "jpxs123"},
+        }
+    @classmethod
+    def _fix_download_link(cls, link: str) -> str:
+        true_link = link.replace("xs../", "/e/DownSys/")
+        return f"{cls.BASE_URL}{true_link}"

novel_downloader/core/parsers/lewenn.py ADDED Viewed

@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.lewenn
+------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["lewenn", "lewen"],
+)
+class LewennParser(BaseParser):
+    """
+    Parser for 乐文小说网 book pages.
+    """
+    BASE_URL = "https://www.lewenn.net"
+    ADS: set[str] = {
+        "app2",
+        "read2",
+        "chaptererror",
+        "记住乐文小说网",
+        "lewenn.net",
+    }
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        # --- Metadata ---
+        book_name = self._first_str(tree.xpath('//div[@id="info"]/h1/text()'))
+        author = self._first_str(
+            tree.xpath('//div[@id="info"]/p[1]/text()'),
+            replaces=[(chr(0xA0), ""), ("作者：", "")],
+        )
+        serial_status = self._first_str(
+            tree.xpath('//div[@id="info"]/p[2]/text()'),
+            replaces=[(chr(0xA0), ""), ("状态：", "")],
+        )
+        update_time = self._first_str(
+            tree.xpath('//div[@id="info"]/p[3]/text()'),
+            replaces=[("最后更新：", "")],
+        )
+        cover_src = self._first_str(tree.xpath('//div[@id="sidebar"]//img/@src'))
+        cover_url = (
+            cover_src if cover_src.startswith("http") else f"{self.BASE_URL}{cover_src}"
+        )
+        summary_lines = tree.xpath('//div[@id="intro"]/p//text()')
+        summary = "\n".join(line.strip() for line in summary_lines).strip()
+        # --- Volumes & Chapters ---
+        chapters: list[ChapterInfoDict] = []
+        for dt in tree.xpath('//div[@class="listmain"]/dl/dt'):
+            title_text = dt.text_content().strip()
+            if "正文" in title_text:
+                # collect its <dd> siblings
+                sib = dt.getnext()
+                while sib is not None and sib.tag == "dd":
+                    a = sib.xpath(".//a")[0]
+                    chap_title = a.text_content().strip()
+                    href = a.get("href")
+                    url = href if href.startswith("http") else f"{self.BASE_URL}{href}"
+                    chap_id = url.rstrip(".html").split("/")[-1]
+                    chapters.append(
+                        {"title": chap_title, "url": url, "chapterId": chap_id}
+                    )
+                    sib = sib.getnext()
+                break
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "serial_status": serial_status,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(tree.xpath('//div[@class="content"]/h1/text()'))
+        nodes = tree.xpath('//div[@id="content" and contains(@class,"showtxt")]')
+        if not nodes:
+            return None
+        content_div = nodes[0]
+        raw_lines = [ln.strip() for ln in content_div.xpath(".//text()")]
+        lines: list[str] = []
+        for ln in raw_lines:
+            if not ln or self._is_ad_line(ln):
+                continue
+            # if ln.startswith("(") and ln.endswith(")"):
+            #     continue
+            lines.append(ln.replace(chr(0xA0), ""))
+        content = "\n".join(lines)
+        if not content.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "lewenn"},
+        }

novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} RENAMED Viewed

@@ -1,24 +1,34 @@
 #!/usr/bin/env python3
 """
-novel_downloader.core.parsers.linovelib.main_parser
----------------------------------------------------
+novel_downloader.core.parsers.linovelib
+---------------------------------------
 """
 import json
 from itertools import islice
-from pathlib import PurePosixPath
 from typing import Any
 from lxml import html
 from novel_downloader.core.parsers.base import BaseParser
-from novel_downloader.models import ChapterDict
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
 from novel_downloader.utils.constants import LINOVELIB_FONT_MAP_PATH
+@register_parser(
+    site_keys=["linovelib"],
+)
 class LinovelibParser(BaseParser):
-    """ """
+    """
+    Parser for 哔哩轻小说 book pages.
+    """
     # Book info XPaths
     _BOOK_NAME_XPATH = '//div[@class="book-info"]/h1[@class="book-name"]/text()'
@@ -46,68 +56,69 @@ class LinovelibParser(BaseParser):
         self,
         html_list: list[str],
         **kwargs: Any,
-    ) -> dict[str, Any]:
-        """
-        Parse a book info page and extract metadata and chapter structure.
-        :param html_list: Raw HTML of the book info page.
-        :return: Parsed metadata and chapter structure as a dictionary.
-        """
+    ) -> BookInfoDict | None:
         if not html_list:
-            return {}
-        info_tree = html.fromstring(html_list[0])
-        result: dict[str, Any] = {}
-        result["book_name"] = self._safe_xpath(info_tree, self._BOOK_NAME_XPATH)
-        result["author"] = self._safe_xpath(info_tree, self._AUTHOR_XPATH)
-        result["cover_url"] = self._safe_xpath(info_tree, self._COVER_URL_XPATH)
-        result["update_time"] = self._safe_xpath(
-            info_tree, self._UPDATE_TIME_XPATH, replace=("最后更新：", "")
+            return None
+        tree = html.fromstring(html_list[0])
+        book_name = self._first_str(tree.xpath(self._BOOK_NAME_XPATH))
+        author = self._first_str(tree.xpath(self._AUTHOR_XPATH))
+        cover_url = self._first_str(tree.xpath(self._COVER_URL_XPATH))
+        update_time = self._first_str(
+            tree.xpath(self._UPDATE_TIME_XPATH), replaces=[("最后更新：", "")]
         )
-        result["serial_status"] = self._safe_xpath(info_tree, self._SERIAL_STATUS_XPATH)
-        result["word_count"] = self._safe_xpath(
-            info_tree, self._WORD_COUNT_XPATH, replace=("字数：", "")
+        serial_status = self._first_str(tree.xpath(self._SERIAL_STATUS_XPATH))
+        word_count = self._first_str(
+            tree.xpath(self._WORD_COUNT_XPATH), replaces=[("最后更新：", "")]
         )
-        result["summary"] = self._extract_intro(info_tree, self._SUMMARY_XPATH)
+        summary = self._extract_intro(tree, self._SUMMARY_XPATH)
         vol_pages = html_list[1:]
-        volumes: list[dict[str, Any]] = []
+        volumes: list[VolumeInfoDict] = []
         for vol_page in vol_pages:
             vol_tree = html.fromstring(vol_page)
-            volume_cover = self._safe_xpath(vol_tree, self._COVER_URL_XPATH)
-            volume_name = self._safe_xpath(vol_tree, self._BOOK_NAME_XPATH)
-            update_time = self._safe_xpath(
-                vol_tree, self._UPDATE_TIME_XPATH, replace=("最后更新：", "")
+            volume_cover = self._first_str(vol_tree.xpath(self._COVER_URL_XPATH))
+            volume_name = self._first_str(vol_tree.xpath(self._BOOK_NAME_XPATH))
+            vol_update_time = self._first_str(
+                vol_tree.xpath(self._UPDATE_TIME_XPATH), replaces=[("最后更新：", "")]
             )
-            word_count = self._safe_xpath(
-                vol_tree, self._WORD_COUNT_XPATH, replace=("字数：", "")
+            vol_word_count = self._first_str(
+                vol_tree.xpath(self._WORD_COUNT_XPATH), replaces=[("字数：", "")]
             )
             volume_intro = self._extract_intro(vol_tree, self._SUMMARY_XPATH)
-            chapters = []
+            chapters: list[ChapterInfoDict] = []
             chapter_elements = vol_tree.xpath(self._CHAPTERS_XPATH)
             for a in chapter_elements:
                 title = a.text.strip()
                 url = a.attrib.get("href", "").strip()
-                chap_path = PurePosixPath(url.rstrip("/"))
-                chapters.append(
-                    {"title": title, "url": url, "chapterId": chap_path.stem}
-                )
+                # '/novel/4668/276082.html' -> '276082'
+                cid = url.split("/")[-1].split(".")[0]
+                chapters.append({"title": title, "url": url, "chapterId": cid})
             volumes.append(
                 {
                     "volume_name": volume_name,
                     "volume_cover": volume_cover,
-                    "update_time": update_time,
-                    "word_count": word_count,
+                    "update_time": vol_update_time,
+                    "word_count": vol_word_count,
                     "volume_intro": volume_intro,
                     "chapters": chapters,
                 }
             )
-        result["volumes"] = volumes
-        return result
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "serial_status": serial_status,
+            "word_count": word_count,
+            "summary": summary,
+            "update_time": update_time,
+            "volumes": volumes,
+            "extra": {},
+        }
     def parse_chapter(
         self,
@@ -115,13 +126,6 @@ class LinovelibParser(BaseParser):
         chapter_id: str,
         **kwargs: Any,
     ) -> ChapterDict | None:
-        """
-        Parse chapter pages and extract clean text or simplified HTML.
-        :param html_list: Raw HTML of the chapter page.
-        :param chapter_id: Identifier of the chapter being parsed.
-        :return: Cleaned chapter content as plain text or minimal HTML.
-        """
         if not html_list:
             return None
         title_text: str = ""
@@ -165,25 +169,10 @@ class LinovelibParser(BaseParser):
         return {
             "id": chapter_id,
             "title": title_text,
-            "content": "\n\n".join(contents),
+            "content": "\n".join(contents),
             "extra": {"site": "linovelib"},
         }
-    def _safe_xpath(
-        self,
-        tree: html.HtmlElement,
-        path: str,
-        replace: tuple[str, str] | None = None,
-    ) -> str:
-        result = tree.xpath(path)
-        if not result:
-            return ""
-        value: str = result[0].strip()
-        if replace:
-            old, new = replace
-            value = value.replace(old, new)
-        return value
     @staticmethod
     def _extract_intro(tree: html.HtmlElement, xpath: str) -> str:
         paragraphs = tree.xpath(xpath.replace("//text()", ""))
@@ -192,7 +181,7 @@ class LinovelibParser(BaseParser):
             text_segments = p.xpath(".//text()")
             cleaned = [seg.strip() for seg in text_segments if seg.strip()]
             lines.append("\n".join(cleaned))
-        return "\n\n".join(lines)
+        return "\n".join(lines)
     @staticmethod
     def _is_encrypted(html: str) -> bool:

novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

novel-downloader 1.4.5py3-none-any.whl → 2.0.0py3-none-any.whl