PyPI - novel-downloader - Versions diffs - 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

novel-downloader 1.5.0py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (248) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +1 -3
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +26 -21
novel_downloader/cli/download.py +79 -66
novel_downloader/cli/export.py +17 -21
novel_downloader/cli/main.py +1 -1
novel_downloader/cli/search.py +62 -65
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +8 -5
novel_downloader/config/adapter.py +206 -209
novel_downloader/config/{loader.py → file_io.py} +53 -26
novel_downloader/core/__init__.py +5 -5
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +3 -24
novel_downloader/core/downloaders/base.py +49 -23
novel_downloader/core/downloaders/common.py +191 -137
novel_downloader/core/downloaders/qianbi.py +187 -146
novel_downloader/core/downloaders/qidian.py +187 -141
novel_downloader/core/downloaders/registry.py +4 -2
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +3 -20
novel_downloader/core/exporters/base.py +33 -37
novel_downloader/core/exporters/common/__init__.py +1 -2
novel_downloader/core/exporters/common/epub.py +15 -10
novel_downloader/core/exporters/common/main_exporter.py +19 -12
novel_downloader/core/exporters/common/txt.py +17 -12
novel_downloader/core/exporters/epub_util.py +59 -29
novel_downloader/core/exporters/linovelib/__init__.py +1 -0
novel_downloader/core/exporters/linovelib/epub.py +23 -25
novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
novel_downloader/core/exporters/linovelib/txt.py +20 -14
novel_downloader/core/exporters/qidian.py +2 -8
novel_downloader/core/exporters/registry.py +4 -2
novel_downloader/core/exporters/txt_util.py +7 -7
novel_downloader/core/fetchers/__init__.py +54 -48
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/lewenn.py +83 -0
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +5 -16
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/shuhaige.py +84 -0
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/wanbengo.py +83 -0
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +1 -9
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +6 -19
novel_downloader/core/interfaces/parser.py +7 -8
novel_downloader/core/interfaces/searcher.py +9 -1
novel_downloader/core/parsers/__init__.py +49 -12
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +64 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/esjzone.py +64 -69
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/linovelib.py +48 -64
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/qianbi.py +48 -50
novel_downloader/core/parsers/qidian/main_parser.py +756 -48
novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +5 -16
novel_downloader/core/parsers/sfacg.py +38 -45
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +429 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +87 -131
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +34 -3
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
novel_downloader/core/searchers/base.py +112 -36
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +43 -25
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +74 -40
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +24 -8
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +34 -85
novel_downloader/locales/zh.json +35 -86
novel_downloader/models/__init__.py +21 -22
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +4 -37
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +5 -0
novel_downloader/resources/config/settings.toml +8 -70
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +13 -24
novel_downloader/utils/chapter_storage.py +5 -5
novel_downloader/utils/constants.py +4 -31
novel_downloader/utils/cookies.py +38 -35
novel_downloader/utils/crypto_utils/__init__.py +7 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/crypto_utils/rc4.py +54 -0
novel_downloader/utils/epub/__init__.py +3 -4
novel_downloader/utils/epub/builder.py +6 -6
novel_downloader/utils/epub/constants.py +62 -21
novel_downloader/utils/epub/documents.py +95 -201
novel_downloader/utils/epub/models.py +8 -22
novel_downloader/utils/epub/utils.py +73 -106
novel_downloader/utils/file_utils/__init__.py +2 -23
novel_downloader/utils/file_utils/io.py +53 -188
novel_downloader/utils/file_utils/normalize.py +1 -7
novel_downloader/utils/file_utils/sanitize.py +4 -15
novel_downloader/utils/fontocr/__init__.py +5 -14
novel_downloader/utils/fontocr/core.py +216 -0
novel_downloader/utils/fontocr/loader.py +50 -0
novel_downloader/utils/logger.py +81 -65
novel_downloader/utils/network.py +17 -41
novel_downloader/utils/state.py +4 -90
novel_downloader/utils/text_utils/__init__.py +1 -7
novel_downloader/utils/text_utils/diff_display.py +5 -7
novel_downloader/utils/text_utils/text_cleaner.py +39 -30
novel_downloader/utils/text_utils/truncate_utils.py +3 -14
novel_downloader/utils/time_utils/__init__.py +5 -11
novel_downloader/utils/time_utils/datetime_utils.py +20 -29
novel_downloader/utils/time_utils/sleep_utils.py +55 -49
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.1.dist-info/METADATA +172 -0
novel_downloader-2.0.1.dist-info/RECORD +206 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
novel_downloader/core/downloaders/biquge.py +0 -29
novel_downloader/core/downloaders/esjzone.py +0 -29
novel_downloader/core/downloaders/linovelib.py +0 -29
novel_downloader/core/downloaders/sfacg.py +0 -29
novel_downloader/core/downloaders/yamibo.py +0 -29
novel_downloader/core/exporters/biquge.py +0 -22
novel_downloader/core/exporters/esjzone.py +0 -22
novel_downloader/core/exporters/qianbi.py +0 -22
novel_downloader/core/exporters/sfacg.py +0 -22
novel_downloader/core/exporters/yamibo.py +0 -22
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -422
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -209
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -198
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -326
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -194
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -234
novel_downloader/core/parsers/biquge.py +0 -139
novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/types.py +0 -13
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/crypto_utils.py +0 -71
novel_downloader/utils/fontocr/hash_store.py +0 -280
novel_downloader/utils/fontocr/hash_utils.py +0 -103
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -315
novel_downloader/utils/fontocr/ocr_v2.py +0 -764
novel_downloader/utils/fontocr/ocr_v3.py +0 -744
novel_downloader-1.5.0.dist-info/METADATA +0 -196
novel_downloader-1.5.0.dist-info/RECORD +0 -164
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0

novel_downloader/core/parsers/esjzone.py CHANGED Viewed

@@ -12,26 +12,20 @@ from lxml import html
 from novel_downloader.core.parsers.base import BaseParser
 from novel_downloader.core.parsers.registry import register_parser
-from novel_downloader.models import ChapterDict
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    VolumeInfoDict,
+)
 @register_parser(
     site_keys=["esjzone"],
-    backends=["session", "browser"],
 )
 class EsjzoneParser(BaseParser):
-    """ """
-    # Book info XPaths
-    _BOOK_NAME_XPATH = '//h2[contains(@class, "text-normal")]/text()'
-    _AUTHOR_XPATH = '//li[strong[text()="作者:"]]/a/text()'
-    _COVER_URL_XPATH = '//div[contains(@class,"product-gallery")]//img/@src'
-    _UPDATE_TIME_XPATH = '//li[strong[text()="更新日期:"]]/text()'
-    _WORD_COUNT_XPATH = '//span[@id="txt"]/text()'
-    _TYPE_XPATH = '//li[strong[text()="類型:"]]/text()'
-    _ALT_NAME_XPATH = '//li[strong[text()="其他書名:"]]/text()'
-    _WEB_URL_XPATH = '//li[strong[text()="Web生肉:"]]/a/@href'
-    _SUMMARY_XPATH = '//div[@class="description"]/p//text()'
+    """
+    Parser for esjzone book pages.
+    """
     # Chapter XPaths
     _CHAPTER_TEXT_XPATH = 'string(//div[contains(@class, "forum-content")])'
@@ -40,14 +34,13 @@ class EsjzoneParser(BaseParser):
         '//i[contains(@class, "icon-clock")]/following-sibling::text()',
         '//i[contains(@class, "icon-pen-tool")]/following-sibling::text()',
     ]
     _CHECK_FORUM_XPATH = '//div[@class="page-title"]//ul[@class="breadcrumbs"]/li[not(@class="slash")]//text()'  # noqa: E501
     def parse_book_info(
         self,
         html_list: list[str],
         **kwargs: Any,
-    ) -> dict[str, Any]:
+    ) -> BookInfoDict | None:
         """
         Parse a book info page and extract metadata and chapter structure.
@@ -58,27 +51,40 @@ class EsjzoneParser(BaseParser):
         :return: Parsed metadata and chapter structure as a dictionary.
         """
         if not html_list or self._is_forum_page(html_list):
-            return {}
+            return None
         tree = html.fromstring(html_list[0])
-        result: dict[str, Any] = {}
-        result["book_name"] = self._get_text(tree, self._BOOK_NAME_XPATH)
-        result["author"] = self._get_text(tree, self._AUTHOR_XPATH)
-        result["cover_url"] = self._get_text(tree, self._COVER_URL_XPATH)
-        result["update_time"] = self._get_text(tree, self._UPDATE_TIME_XPATH)
-        result["word_count"] = self._get_text(
-            tree, self._WORD_COUNT_XPATH, clean_comma=True
+        # --- Basic metadata ---
+        book_name = self._first_str(
+            tree.xpath('//h2[contains(@class,"text-normal")]/text()')
         )
-        result["type"] = self._get_text(tree, self._TYPE_XPATH)
-        result["alt_name"] = self._get_text(tree, self._ALT_NAME_XPATH)
-        result["web_url"] = self._get_text(tree, self._WEB_URL_XPATH)
-        # result["summary"] = self._get_text(tree, self._SUMMARY_XPATH, join=True)
+        author = self._first_str(tree.xpath('//li[strong[text()="作者:"]]/a/text()'))
+        cover_url = self._first_str(
+            tree.xpath('//div[contains(@class,"product-gallery")]//img/@src')
+        )
+        update_time = self._first_str(
+            tree.xpath('//li[strong[text()="更新日期:"]]/text()')
+        )  # noqa: E501
+        word_count = self._first_str(
+            tree.xpath('//span[@id="txt"]/text()'), replaces=[(",", "")]
+        )
+        book_type = self._first_str(tree.xpath('//li[strong[text()="類型:"]]/text()'))
+        alt_name = self._first_str(
+            tree.xpath('//li[strong[text()="其他書名:"]]/text()')
+        )  # noqa: E501
+        web_url = self._first_str(tree.xpath('//li[strong[text()="Web生肉:"]]/a/@href'))
+        # Summary paragraphs
         paras = tree.xpath('//div[@class="description"]/p')
         texts = [p.xpath("string()").strip() for p in paras]
-        result["summary"] = "\n".join(texts).strip()
+        summary = "\n".join(t for t in texts if t)
-        volumes: list[dict[str, Any]] = []
-        current_vol: dict[str, Any] = {}
+        current_vol: VolumeInfoDict = {
+            "volume_name": "單卷",
+            "chapters": [],
+        }
+        volumes: list[VolumeInfoDict] = [current_vol]
         def _is_garbage_title(name: str) -> bool:
             stripped = name.strip()
@@ -89,25 +95,18 @@ class EsjzoneParser(BaseParser):
             if _is_garbage_title(name):
                 return
             name = name.strip() or "未命名卷"
-            if name == "未命名卷" and current_vol is not None:
+            if current_vol and current_vol["volume_name"] == name:
                 return
             current_vol = {"volume_name": name, "chapters": []}
             volumes.append(current_vol)
-        _start_volume("單卷")
-        # nodes = tree.xpath('//div[@id="chapterList"]/details') + tree.xpath(
-        #     '//div[@id="chapterList"]/*[not(self::details)]'
-        # )
         nodes = tree.xpath('//div[@id="chapterList"]/*')
         for node in nodes:
             tag = node.tag.lower()
             if tag == "details":
                 # ---- DETAILS-based layout ----
-                summary = node.find("summary")
-                vol_name = summary.text if summary is not None else "未命名卷"
+                vol_name = node.xpath("string(./summary)").strip() or "未命名卷"
                 _start_volume(vol_name)
                 # all chapters inside this details
@@ -116,7 +115,11 @@ class EsjzoneParser(BaseParser):
                     href = a.get("href", "")
                     chap_id = href.rstrip("/").split("/")[-1].split(".", 1)[0]
                     current_vol["chapters"].append(
-                        {"title": title, "url": href, "chapterId": chap_id}
+                        {
+                            "title": title,
+                            "url": href,
+                            "chapterId": chap_id,
+                        }
                     )
             elif (
@@ -125,9 +128,9 @@ class EsjzoneParser(BaseParser):
                 or tag == "summary"
             ):
                 # Handle possible volume title markers:
-                # - <h2>: standard volume header
-                # - <p class="non">: alternative volume header style
-                # - <summary>: fallback for stray <summary> tags outside <details>
+                # * <h2>: standard volume header
+                # * <p class="non">: alternative volume header style
+                # * <summary>: fallback for stray <summary> tags outside <details>
                 _start_volume(node.xpath("string()"))
             elif tag == "a":
@@ -139,9 +142,21 @@ class EsjzoneParser(BaseParser):
                     {"title": title, "url": href, "chapterId": chap_id}
                 )
         volumes = [vol for vol in volumes if vol["chapters"]]
-        result["volumes"] = volumes
-        return result
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "summary": summary,
+            "tags": [book_type],
+            "word_count": word_count,
+            "volumes": volumes,
+            "extra": {
+                "alt_name": alt_name,
+                "web_url": web_url,
+            },
+        }
     def parse_chapter(
         self,
@@ -149,16 +164,9 @@ class EsjzoneParser(BaseParser):
         chapter_id: str,
         **kwargs: Any,
     ) -> ChapterDict | None:
-        """
-        Parse a single chapter page and extract clean text or simplified HTML.
-        :param html_list: Raw HTML of the chapter page.
-        :param chapter_id: Identifier of the chapter being parsed.
-        :return: Cleaned chapter content as plain text or minimal HTML.
-        """
         if not html_list or self._is_forum_page(html_list):
             return None
-        tree = html.fromstring(html_list[0], parser=None)
+        tree = html.fromstring(html_list[0])
         content_lines: list[str] = []
         content_nodes = tree.xpath(self._CHAPTER_CONTENT_NODES_XPATH)
@@ -178,7 +186,7 @@ class EsjzoneParser(BaseParser):
                     content_lines.append(f'<img src="{src}" />')
         content = (
-            "\n\n".join(content_lines).strip()
+            "\n".join(content_lines).strip()
             if content_lines
             else tree.xpath(self._CHAPTER_TEXT_XPATH).strip()
         )
@@ -216,16 +224,3 @@ class EsjzoneParser(BaseParser):
         breadcrumb: list[str] = tree.xpath(self._CHECK_FORUM_XPATH)
         breadcrumb = [s.strip() for s in breadcrumb if s.strip()]
         return breadcrumb == ["Home", "論壇"]
-    @staticmethod
-    def _get_text(
-        tree: html.HtmlElement,
-        xpath: str,
-        join: bool = False,
-        clean_comma: bool = False,
-    ) -> str:
-        data = tree.xpath(xpath)
-        if not data:
-            return ""
-        text = "\n".join(data) if join else data[0].strip()
-        return text.replace(",", "") if clean_comma else text

novel_downloader/core/parsers/guidaye.py ADDED Viewed

@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.guidaye
+-------------------------------------
+"""
+import re
+from datetime import datetime
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["guidaye"],
+)
+class GuidayeParser(BaseParser):
+    """
+    Parser for 名著阅读 book pages.
+    """
+    BASE_URL = "https://b.guidaye.com"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        # Book metadata
+        book_name = self._first_str(tree.xpath('//h1[@class="page-title"]/a/text()'))
+        author = self._first_str(
+            tree.xpath('//div[@id="category-description-author"]/a/text()')
+        )
+        cover_url = self.BASE_URL + self._first_str(
+            tree.xpath('//div[@id="category-description-image"]//img/@src')
+        )
+        # Summary paragraphs
+        summary = (
+            tree.xpath('string(//div[@id="category-description-text"])')
+            .replace("内容简介：", "", 1)
+            .strip()
+        )
+        # Chapter volumes & listings
+        volumes: list[VolumeInfoDict] = []
+        curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
+        items = tree.xpath('//div[@class="entry-content"]/ul/*')
+        for elem in items:
+            if elem.tag.lower() == "h3":
+                # Flush previous volume
+                if curr_vol["chapters"]:
+                    volumes.append(curr_vol)
+                curr_vol = {"volume_name": elem.text_content().strip(), "chapters": []}
+            elif elem.tag.lower() == "li":
+                link = elem.xpath(".//a")[0]
+                href = link.get("href", "").strip()
+                title = link.get("title", "").strip()
+                cid_match = re.search(r"/(\d+)\.html$", href)
+                chapter_id = cid_match.group(1) if cid_match else ""
+                curr_vol["chapters"].append(
+                    {"title": title, "url": href, "chapterId": chapter_id}
+                )
+        # Append last volume
+        if curr_vol["chapters"]:
+            volumes.append(curr_vol)
+        # Timestamp of parsing
+        share_text = tree.xpath('string(//div[@id="category-description-share"])')
+        m = re.search(r"最近更新[：:]\s*([\d-]+)", share_text)
+        update_time = m.group(1) if m else datetime.now().strftime("%Y-%m-%d")
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        # Title from entry-title
+        title = self._first_str(tree.xpath('//h1[@class="entry-title"]/text()'))
+        # Extract paragraphs within entry-content
+        full_text = tree.xpath('string(//div[@class="entry-content"])')
+        full_text = full_text.replace("\u00A0", " ")
+        # 3. Split into lines and clean up
+        lines = [line.strip() for line in full_text.splitlines() if line.strip()]
+        if not lines:
+            return None
+        content = "\n".join(lines)
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "guidaye"},
+        }

novel_downloader/core/parsers/hetushu.py ADDED Viewed

@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.hetushu
+-------------------------------------
+"""
+import re
+from datetime import datetime
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["hetushu"],
+)
+class HetushuParser(BaseParser):
+    """
+    Parser for 和图书 book pages.
+    """
+    BASE_URL = "https://www.hetushu.com"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        # --- Metadata ---
+        book_name = self._first_str(
+            tree.xpath('//div[contains(@class,"book_info")]/h2/text()')
+        )
+        author = self._first_str(
+            tree.xpath(
+                '//div[contains(@class,"book_info")]/div[contains(.,"作者")]/a/text()'
+            )
+        )
+        cover_url = self.BASE_URL + self._first_str(
+            tree.xpath('//div[contains(@class,"book_info")]//img/@src')
+        )
+        cls_attr = self._first_str(
+            tree.xpath('//div[contains(@class,"book_info")]/@class')
+        )
+        serial_status = "已完结" if "finish" in cls_attr else "连载中"
+        tags = [
+            a.strip()
+            for a in tree.xpath('//dl[@class="tag"]//dd/a/text()')
+            if a.strip()
+        ]
+        paras = tree.xpath('//div[@class="intro"]/p/text()')
+        summary = "\n".join(p.strip() for p in paras if p.strip())
+        # --- Chapter volumes & listings ---
+        volumes: list[VolumeInfoDict] = []
+        curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
+        for elem in tree.xpath('//dl[@id="dir"]/*'):
+            if elem.tag == "dt":
+                # Start a new volume
+                if curr_vol["chapters"]:
+                    volumes.append(curr_vol)
+                curr_vol = {
+                    "volume_name": elem.text_content().strip(),
+                    "chapters": [],
+                }
+            elif elem.tag == "dd":
+                link = elem.xpath(".//a")[0]
+                href = link.get("href", "").strip()
+                title = link.get("title", "").strip()
+                # Extract numeric chapterId from the URL
+                m = re.search(r"/book/\d+/(?P<id>\d+)\.html", href)
+                chapter_id = m.group("id") if m else ""
+                curr_vol["chapters"].append(
+                    {"title": title, "url": href, "chapterId": chapter_id}
+                )
+        # Append the last volume if it has any chapters
+        if curr_vol["chapters"]:
+            volumes.append(curr_vol)
+        update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "serial_status": serial_status,
+            "tags": tags,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(
+            tree.xpath('//div[@id="content"]//h2[@class="h2"]/text()')
+        )
+        paras = tree.xpath('//div[@id="content"]/div[not(@class)]/text()')
+        paragraph_texts = [p.strip() for p in paras if p.strip()]
+        content = "\n".join(paragraph_texts)
+        if not content.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "hetushu"},
+        }

novel_downloader/core/parsers/i25zw.py ADDED Viewed

@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.i25zw
+-----------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["i25zw"],
+)
+class I25zwParser(BaseParser):
+    """
+    Parser for 25中文网 book-info pages.
+    """
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if len(html_list) < 2:
+            return None
+        info_tree = html.fromstring(html_list[0])
+        catalog_tree = html.fromstring(html_list[1])
+        # Metadata extraction
+        book_name = self._first_str(info_tree.xpath("//h1[@class='f21h']/text()"))
+        author = self._first_str(info_tree.xpath("//h1[@class='f21h']/em/a/text()"))
+        cover_url = self._first_str(info_tree.xpath("//div[@class='pic']/img/@src"))
+        # Tags, status, word count, update time
+        tag = self._first_str(
+            info_tree.xpath("//b[contains(text(),'小说分类')]/parent::td/text()")
+        )
+        serial_status = self._first_str(
+            info_tree.xpath("//b[contains(text(),'小说状态')]/parent::td/text()")
+        )
+        word_count = self._first_str(
+            info_tree.xpath("//b[contains(text(),'全文字数')]/parent::td/text()")
+        )
+        raw_update = self._first_str(
+            info_tree.xpath("//b[contains(text(),'更新时间')]/parent::td/text()")
+        )
+        update_time = raw_update.strip("()")
+        # Summary from styled intro div
+        full_intro = info_tree.xpath("string(//div[@class='intro'][@style])").strip()
+        summary = full_intro.replace(f"关于{book_name}：", "", 1).strip()
+        # Chapter list extraction
+        dl = catalog_tree.xpath("//div[@id='list']/dl")[0]
+        # Full-text section dd's
+        dds = dl.xpath("./dd[preceding-sibling::dt[1][contains(., '正文')]]/a")
+        if not dds:
+            # Fallback to second <dt>'s following <dd>
+            dds = dl.xpath("./dt[2]/following-sibling::dd/a")
+        chapters: list[ChapterInfoDict] = []
+        for a in dds:
+            url = a.get("href", "").strip()
+            title = a.text_content().strip()
+            # '/311006/252845677.html' -> '252845677'
+            chapter_id = url.split("/")[-1].split(".")[0]
+            chapters.append(
+                {
+                    "title": title,
+                    "url": url,
+                    "chapterId": chapter_id,
+                }
+            )
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "word_count": word_count,
+            "serial_status": serial_status,
+            "tags": [tag] if tag else [],
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title_text = self._first_str(
+            tree.xpath("//div[@class='zhangjieming']/h1/text()")
+        )
+        content_divs = tree.xpath("//div[@id='content']")
+        if not content_divs:
+            return None
+        content_div = content_divs[0]
+        # Only select direct <p> children to avoid nav links
+        paragraphs = []
+        for p in content_div.xpath("./p"):
+            text = p.text_content().strip()
+            if text:
+                paragraphs.append(text)
+        content_text = "\n".join(paragraphs)
+        if not content_text.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title_text,
+            "content": content_text,
+            "extra": {"site": "i25zw"},
+        }

novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

novel-downloader 1.5.0py3-none-any.whl → 2.0.1py3-none-any.whl