PyPI - novel-downloader - Versions diffs - 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

novel-downloader 1.4.5py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +2 -4
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +27 -104
novel_downloader/cli/download.py +78 -66
novel_downloader/cli/export.py +20 -21
novel_downloader/cli/main.py +3 -1
novel_downloader/cli/search.py +120 -0
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +10 -14
novel_downloader/config/adapter.py +195 -99
novel_downloader/config/{loader.py → file_io.py} +53 -27
novel_downloader/core/__init__.py +14 -13
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/archived/qidian/searcher.py +79 -0
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +8 -30
novel_downloader/core/downloaders/base.py +182 -30
novel_downloader/core/downloaders/common.py +217 -384
novel_downloader/core/downloaders/qianbi.py +332 -4
novel_downloader/core/downloaders/qidian.py +250 -290
novel_downloader/core/downloaders/registry.py +69 -0
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +8 -26
novel_downloader/core/exporters/base.py +107 -31
novel_downloader/core/exporters/common/__init__.py +3 -4
novel_downloader/core/exporters/common/epub.py +92 -171
novel_downloader/core/exporters/common/main_exporter.py +14 -67
novel_downloader/core/exporters/common/txt.py +90 -86
novel_downloader/core/exporters/epub_util.py +184 -1327
novel_downloader/core/exporters/linovelib/__init__.py +3 -2
novel_downloader/core/exporters/linovelib/epub.py +165 -222
novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
novel_downloader/core/exporters/linovelib/txt.py +76 -66
novel_downloader/core/exporters/qidian.py +15 -11
novel_downloader/core/exporters/registry.py +55 -0
novel_downloader/core/exporters/txt_util.py +67 -0
novel_downloader/core/fetchers/__init__.py +57 -56
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
novel_downloader/core/fetchers/biquyuedu.py +83 -0
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +60 -0
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +8 -14
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +4 -17
novel_downloader/core/interfaces/parser.py +5 -6
novel_downloader/core/interfaces/searcher.py +26 -0
novel_downloader/core/parsers/__init__.py +58 -22
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +63 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
novel_downloader/core/parsers/qidian/__init__.py +2 -2
novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
novel_downloader/core/parsers/qidian/main_parser.py +19 -57
novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +57 -0
novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +435 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +155 -0
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +51 -0
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/b520.py +84 -0
novel_downloader/core/searchers/base.py +168 -0
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +102 -0
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +165 -0
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +79 -0
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +36 -79
novel_downloader/locales/zh.json +37 -80
novel_downloader/models/__init__.py +23 -50
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +16 -43
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +21 -0
novel_downloader/resources/config/settings.toml +39 -74
novel_downloader/resources/css_styles/intro.css +83 -0
novel_downloader/resources/css_styles/main.css +30 -89
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +43 -0
novel_downloader/utils/chapter_storage.py +247 -226
novel_downloader/utils/constants.py +5 -50
novel_downloader/utils/cookies.py +6 -18
novel_downloader/utils/crypto_utils/__init__.py +13 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
novel_downloader/utils/epub/__init__.py +34 -0
novel_downloader/utils/epub/builder.py +377 -0
novel_downloader/utils/epub/constants.py +118 -0
novel_downloader/utils/epub/documents.py +297 -0
novel_downloader/utils/epub/models.py +120 -0
novel_downloader/utils/epub/utils.py +179 -0
novel_downloader/utils/file_utils/__init__.py +5 -30
novel_downloader/utils/file_utils/io.py +9 -150
novel_downloader/utils/file_utils/normalize.py +2 -2
novel_downloader/utils/file_utils/sanitize.py +2 -7
novel_downloader/utils/fontocr.py +207 -0
novel_downloader/utils/i18n.py +2 -0
novel_downloader/utils/logger.py +10 -16
novel_downloader/utils/network.py +111 -252
novel_downloader/utils/state.py +5 -90
novel_downloader/utils/text_utils/__init__.py +16 -21
novel_downloader/utils/text_utils/diff_display.py +6 -9
novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
novel_downloader/utils/text_utils/text_cleaner.py +179 -0
novel_downloader/utils/text_utils/truncate_utils.py +62 -0
novel_downloader/utils/time_utils/__init__.py +6 -12
novel_downloader/utils/time_utils/datetime_utils.py +23 -33
novel_downloader/utils/time_utils/sleep_utils.py +5 -10
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.0.dist-info/METADATA +171 -0
novel_downloader-2.0.0.dist-info/RECORD +210 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
novel_downloader/config/site_rules.py +0 -94
novel_downloader/core/downloaders/biquge.py +0 -25
novel_downloader/core/downloaders/esjzone.py +0 -25
novel_downloader/core/downloaders/linovelib.py +0 -25
novel_downloader/core/downloaders/sfacg.py +0 -25
novel_downloader/core/downloaders/yamibo.py +0 -25
novel_downloader/core/exporters/biquge.py +0 -25
novel_downloader/core/exporters/esjzone.py +0 -25
novel_downloader/core/exporters/qianbi.py +0 -25
novel_downloader/core/exporters/sfacg.py +0 -25
novel_downloader/core/exporters/yamibo.py +0 -25
novel_downloader/core/factory/__init__.py +0 -20
novel_downloader/core/factory/downloader.py +0 -73
novel_downloader/core/factory/exporter.py +0 -58
novel_downloader/core/factory/fetcher.py +0 -96
novel_downloader/core/factory/parser.py +0 -86
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -403
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/common/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -204
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -193
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -318
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -189
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -229
novel_downloader/core/parsers/biquge/__init__.py +0 -10
novel_downloader/core/parsers/biquge/main_parser.py +0 -134
novel_downloader/core/parsers/common/__init__.py +0 -13
novel_downloader/core/parsers/common/helper.py +0 -323
novel_downloader/core/parsers/common/main_parser.py +0 -106
novel_downloader/core/parsers/esjzone/__init__.py +0 -10
novel_downloader/core/parsers/linovelib/__init__.py +0 -10
novel_downloader/core/parsers/qianbi/__init__.py +0 -10
novel_downloader/core/parsers/sfacg/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
novel_downloader/models/browser.py +0 -21
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/site_rules.py +0 -99
novel_downloader/models/tasks.py +0 -33
novel_downloader/models/types.py +0 -15
novel_downloader/resources/css_styles/volume-intro.css +0 -56
novel_downloader/resources/json/replace_word_map.json +0 -4
novel_downloader/resources/text/blacklist.txt +0 -22
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/fontocr/__init__.py +0 -22
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -303
novel_downloader/utils/fontocr/ocr_v2.py +0 -752
novel_downloader/utils/hash_store.py +0 -279
novel_downloader/utils/hash_utils.py +0 -103
novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
novel_downloader/utils/text_utils/font_mapping.py +0 -28
novel_downloader/utils/text_utils/text_cleaning.py +0 -107
novel_downloader-1.4.5.dist-info/METADATA +0 -196
novel_downloader-1.4.5.dist-info/RECORD +0 -165
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0

novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} RENAMED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-novel_downloader.core.parsers.sfacg.main_parser
------------------------------------------------
+novel_downloader.core.parsers.sfacg
+-----------------------------------
 """
@@ -10,18 +10,33 @@ from typing import Any
 from lxml import html
 from novel_downloader.core.parsers.base import BaseParser
-from novel_downloader.models import ChapterDict
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["sfacg"],
+)
 class SfacgParser(BaseParser):
-    """ """
+    """
+    Parser for sfacg book pages.
+    """
     # Book info XPaths
     _BOOK_NAME_XPATH = '//ul[@class="book_info"]//span[@class="book_newtitle"]/text()'
     _AUTHOR_INFO_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/text()'
     _UPDATE_TIME_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/br/following-sibling::text()'  # noqa: E501
     _COVER_URL_XPATH = '//ul[@class="book_info"]//li/img/@src'
-    _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
+    # _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
+    _STATUS_XPATH = (
+        '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
+        ' and (contains(., "完结") or contains(., "连载"))]/text()'
+    )
     _SUMMARY_XPATH = '//ul[@class="book_profile"]/li[@class="book_bk_qs1"]/text()'
     # Catalog XPaths
@@ -42,54 +57,35 @@ class SfacgParser(BaseParser):
         self,
         html_list: list[str],
         **kwargs: Any,
-    ) -> dict[str, Any]:
-        """
-        Parse a book info page and extract metadata and chapter structure.
-        :param html_list: Raw HTML of the book info page.
-        :return: Parsed metadata and chapter structure as a dictionary.
-        """
+    ) -> BookInfoDict | None:
         if len(html_list) < 2:
-            return {}
+            return None
         info_tree = html.fromstring(html_list[0])
         catalog_tree = html.fromstring(html_list[1])
-        result: dict[str, Any] = {}
         # Book metadata
-        book_name = info_tree.xpath(self._BOOK_NAME_XPATH)
-        result["book_name"] = book_name[0].strip() if book_name else ""
+        book_name = self._first_str(info_tree.xpath(self._BOOK_NAME_XPATH))
-        book_info3 = info_tree.xpath(self._AUTHOR_INFO_XPATH)
-        result["author"] = book_info3[0].split("/")[0].strip() if book_info3 else ""
-        result["word_count"] = (
-            book_info3[0].split("/")[1].strip()
-            if book_info3 and len(book_info3[0].split("/")) > 1
-            else ""
-        )
+        book_info3_str = self._first_str(info_tree.xpath(self._AUTHOR_INFO_XPATH))
+        author, _, word_count = (p.strip() for p in book_info3_str.partition("/"))
-        book_info3_br = info_tree.xpath(self._UPDATE_TIME_XPATH)
-        result["update_time"] = book_info3_br[0].strip() if book_info3_br else ""
+        update_time = self._first_str(info_tree.xpath(self._UPDATE_TIME_XPATH))
-        cover_url = info_tree.xpath(self._COVER_URL_XPATH)
-        result["cover_url"] = "https:" + cover_url[0] if cover_url else ""
+        cover_url = "https:" + self._first_str(info_tree.xpath(self._COVER_URL_XPATH))
-        serial_status = info_tree.xpath(self._STATUS_XPATH)
-        result["serial_status"] = next(
-            (s for s in serial_status if "完结" in s or "连载" in s), ""
-        )
+        serial_status = self._first_str(info_tree.xpath(self._STATUS_XPATH))
-        summary = info_tree.xpath(self._SUMMARY_XPATH)
-        result["summary"] = "".join(summary).strip()
+        summary_elem = info_tree.xpath(self._SUMMARY_XPATH)
+        summary = "".join(summary_elem).strip()
         # Chapter structure
         volume_titles = catalog_tree.xpath(self._VOLUME_TITLE_XPATH)
         volume_blocks = catalog_tree.xpath(self._VOLUME_CONTENT_XPATH)
-        volumes = []
+        volumes: list[VolumeInfoDict] = []
         for vol_title, vol_block in zip(volume_titles, volume_blocks, strict=False):
-            chapters = []
+            chapters: list[ChapterInfoDict] = []
             for a in vol_block.xpath(self._CHAPTER_LIST_XPATH):
                 href = a.xpath("./@href")[0] if a.xpath("./@href") else ""
                 title = "".join(a.xpath(".//li//text()")).strip()
@@ -107,9 +103,18 @@ class SfacgParser(BaseParser):
                     "chapters": chapters,
                 }
             )
-        result["volumes"] = volumes
-        return result
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "word_count": word_count,
+            "serial_status": serial_status,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
     def parse_chapter(
         self,
@@ -117,13 +122,6 @@ class SfacgParser(BaseParser):
         chapter_id: str,
         **kwargs: Any,
     ) -> ChapterDict | None:
-        """
-        Parse a single chapter page and extract clean text or simplified HTML.
-        :param html_list: Raw HTML of the chapter page.
-        :param chapter_id: Identifier of the chapter being parsed.
-        :return: Cleaned chapter content as plain text or minimal HTML.
-        """
         if not html_list:
             return None
         keywords = [
@@ -151,7 +149,7 @@ class SfacgParser(BaseParser):
             raw_text_parts = tree.xpath(self._CHAPTER_TEXT_XPATH)
             content_lines = [txt.strip() for txt in raw_text_parts if txt.strip()]
-        content = "\n\n".join(content_lines).strip()
+        content = "\n".join(content_lines).strip()
         if not content:
             return None

novel_downloader/core/parsers/shencou.py ADDED Viewed

@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.shencou
+-------------------------------------
+"""
+from typing import Any
+from lxml import etree, html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["shencou"],
+)
+class ShencouParser(BaseParser):
+    """
+    Parser for 神凑轻小说 book pages.
+    """
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if len(html_list) < 2:
+            return None
+        info_tree = html.fromstring(html_list[0])
+        catalog_tree = html.fromstring(html_list[1])
+        # --- Metadata ---
+        raw_name = self._first_str(info_tree.xpath("//span//a/text()"))
+        book_name = raw_name[:-2] if raw_name.endswith("小说") else raw_name
+        author = self._first_str(
+            info_tree.xpath('//td[contains(text(),"小说作者")]/text()'),
+            replaces=[("小说作者：", "")],
+        )
+        cover_url = self._first_str(
+            info_tree.xpath('//a[contains(@href,"/files/article/image")]/img/@src')
+        )
+        # word count
+        word_count = self._first_str(
+            info_tree.xpath('//td[contains(text(),"全文长度")]/text()'),
+            replaces=[("全文长度：", "")],
+        )
+        # update time
+        update_time = self._first_str(
+            info_tree.xpath('//td[contains(text(),"最后更新")]/text()'),
+            replaces=[("最后更新：", "")],
+        )
+        # serial status
+        serial_status = self._first_str(
+            info_tree.xpath('//td[contains(text(),"写作进度")]/text()'),
+            replaces=[("写作进度：", "")],
+        )
+        # summary
+        raw_detail = self._norm_space(
+            info_tree.xpath('string(//td[@width="80%" and @valign="top"])')
+        )
+        summary = ""
+        if "内容简介：" in raw_detail and "本书公告：" in raw_detail:
+            intro = raw_detail.split("内容简介：", 1)[1]
+            summary = intro.split("本书公告：", 1)[0].strip()
+        # --- Catalog / Chapters ---
+        volumes: list[VolumeInfoDict] = []
+        curr_vol: VolumeInfoDict = {"volume_name": "未命名卷", "chapters": []}
+        # Walk through volume headers (.zjbox) and lists (.zjlist4) in document order
+        for elem in catalog_tree.xpath(
+            '//div[@class="zjbox"] | //div[@class="zjlist4"]'
+        ):
+            cls_attr = elem.get("class", "")
+            if "zjbox" in cls_attr:
+                # before starting new volume, save the previous if it has chapters
+                if curr_vol["chapters"]:
+                    volumes.append(curr_vol)
+                # start a new volume
+                vol_name = elem.xpath(".//h2/text()")[0].strip()
+                curr_vol = {"volume_name": vol_name, "chapters": []}
+            elif "zjlist4" in cls_attr:
+                # collect all <li><a> entries under this list
+                for a in elem.xpath(".//ol/li/a"):
+                    url = a.get("href").strip()
+                    title = a.text_content().strip()
+                    # '203740.html' -> '203740'
+                    chap_id = url.split(".")[0]
+                    curr_vol["chapters"].append(
+                        {
+                            "title": title,
+                            "url": url,
+                            "chapterId": chap_id,
+                        }
+                    )
+        # append last volume if not empty
+        if curr_vol["chapters"]:
+            volumes.append(curr_vol)
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "summary": summary,
+            "volumes": volumes,
+            "word_count": word_count,
+            "serial_status": serial_status,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(tree.xpath("//h1/text()"))
+        if not title:
+            return None
+        # strip book-name prefix if present
+        bc = tree.xpath('//div[@id="breadCrumb"]//a/text()')
+        if len(bc) >= 2:
+            book_name = bc[1].strip()
+            title = title.removeprefix(book_name).lstrip(" ：:–—-").strip()
+        anchors = tree.xpath('//div[@id="BookSee_Right"]')
+        if not anchors:
+            return None
+        marker = anchors[0]
+        lines: list[str] = []
+        def _append_text(text: str) -> None:
+            for ln in text.replace("\xa0", " ").splitlines():
+                ln2 = ln.strip()
+                if ln2:
+                    lines.append(ln2)
+        if marker.tail:
+            _append_text(marker.tail)
+        # 4. Walk through siblings until <!--over-->
+        node = marker
+        while True:
+            sib = node.getnext()
+            if sib is None:
+                break
+            node = sib
+            # Stop on the closing comment
+            if isinstance(sib, etree._Comment) and "over" in (sib.text or ""):
+                break
+            # Process comment tails (e.g. <!--go--> tail)
+            if isinstance(sib, etree._Comment):
+                if sib.tail:
+                    _append_text(sib.tail)
+                continue
+            if isinstance(sib, html.HtmlElement):
+                # tag = sib.tag.lower()
+                tag = str(sib.tag).lower()
+                cls = sib.get("class", "") or ""
+                if tag == "div" and "divimage" in cls:
+                    srcs = sib.xpath(".//img/@src")
+                    if srcs:
+                        lines.append(f'<img src="{srcs[0]}" />')
+                    # text after the div
+                    if sib.tail:
+                        _append_text(sib.tail)
+                    continue
+                if tag == "br":
+                    if sib.tail:
+                        _append_text(sib.tail)
+                    continue
+                text = sib.text_content()
+                _append_text(text)
+                if sib.tail:
+                    _append_text(sib.tail)
+                continue
+        content = "\n".join(lines)
+        if not content:
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "shencou"},
+        }

novel_downloader/core/parsers/shuhaige.py ADDED Viewed

@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.shuhaige
+--------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["shuhaige"],
+)
+class ShuhaigeParser(BaseParser):
+    """
+    Parser for 书海阁小说网 book pages.
+    """
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        book_name = self._first_str(tree.xpath('//div[@id="info"]/h1/text()'))
+        author = self._first_str(tree.xpath('//div[@id="info"]/p[1]/a/text()'))
+        cover_url = self._first_str(tree.xpath('//div[@id="fmimg"]/img/@src'))
+        update_time = self._first_str(
+            tree.xpath('//div[@id="info"]/p[3]/text()'),
+            replaces=[("最后更新：", "")],
+        )
+        summary = self._first_str(tree.xpath('//div[@id="intro"]/p[1]/text()'))
+        book_type = self._first_str(tree.xpath('//div[@class="con_top"]/a[2]/text()'))
+        tags = [book_type] if book_type else []
+        chapters: list[ChapterInfoDict] = [
+            {
+                "title": (a.text or "").strip(),
+                "url": (a.get("href") or "").strip(),
+                "chapterId": (a.get("href") or "").rsplit("/", 1)[-1].split(".", 1)[0],
+            }
+            for a in tree.xpath(
+                '//div[@id="list"]/dl/dt[contains(., "正文")]/following-sibling::dd/a'
+            )
+        ]
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "tags": tags,
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        title = self._first_str(tree.xpath('//div[@class="bookname"]/h1/text()'))
+        if not title:
+            title = f"第 {chapter_id} 章"
+        content_elem = tree.xpath('//div[@id="content"]')
+        if not content_elem:
+            return None
+        paragraphs = [
+            "".join(p.itertext()).strip() for p in content_elem[0].xpath(".//p")
+        ]
+        if paragraphs and "www.shuhaige.net" in paragraphs[-1]:
+            paragraphs.pop()
+        content = "\n".join(paragraphs)
+        if not content.strip():
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "shuhaige"},
+        }

novel_downloader/core/parsers/tongrenquan.py ADDED Viewed

@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.parsers.tongrenquan
+-----------------------------------------
+"""
+from typing import Any
+from lxml import html
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.core.parsers.registry import register_parser
+from novel_downloader.models import (
+    BookInfoDict,
+    ChapterDict,
+    ChapterInfoDict,
+    VolumeInfoDict,
+)
+@register_parser(
+    site_keys=["tongrenquan"],
+)
+class TongrenquanParser(BaseParser):
+    """
+    Parser for 同人圈 book pages.
+    """
+    BASE_URL = "https://www.tongrenquan.org"
+    def parse_book_info(
+        self,
+        html_list: list[str],
+        **kwargs: Any,
+    ) -> BookInfoDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        # Metadata
+        book_name = self._first_str(tree.xpath('//div[@class="infos"]/h1/text()'))
+        author = self._first_str(
+            tree.xpath('//div[@class="date"]/span/text()'),
+            replaces=[("作者：", "")],
+        )
+        cover_url = self.BASE_URL + self._first_str(
+            tree.xpath('//div[@class="pic"]//img/@src')
+        )
+        update_time = self._first_str(
+            tree.xpath('//div[@class="date"]/text()'),
+            replaces=[("日期：", "")],
+        )
+        # Summary (collapse text within the <p> tag)
+        paras = tree.xpath('//div[@class="infos"]/p//text()')
+        summary = "\n".join(p.strip() for p in paras if p.strip())
+        # Chapters extraction
+        chapters: list[ChapterInfoDict] = []
+        for a in tree.xpath('//div[contains(@class,"book_list")]//ul//li/a'):
+            url = a.get("href", "").strip()
+            title = a.text_content().strip()
+            # General pattern: /category/bookId/chapterId.html
+            # '/tongren/7562/462.html' -> '462'
+            chapter_id = url.rstrip(".html").split("/")[-1]
+            chapters.append({"title": title, "url": url, "chapterId": chapter_id})
+        volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
+        return {
+            "book_name": book_name,
+            "author": author,
+            "cover_url": cover_url,
+            "update_time": update_time,
+            "tags": ["同人小说"],
+            "summary": summary,
+            "volumes": volumes,
+            "extra": {},
+        }
+    def parse_chapter(
+        self,
+        html_list: list[str],
+        chapter_id: str,
+        **kwargs: Any,
+    ) -> ChapterDict | None:
+        if not html_list:
+            return None
+        tree = html.fromstring(html_list[0])
+        raw_title = self._first_str(
+            tree.xpath('//div[contains(@class,"read_chapterName")]//h1/text()')
+        )
+        book_name = self._first_str(
+            tree.xpath('//div[contains(@class,"readTop")]//a[last()]/text()')
+        )
+        title = raw_title.replace(book_name, "").strip()
+        # Extract paragraphs of content
+        paras = tree.xpath('//div[contains(@class,"read_chapterDetail")]/p')
+        texts = [p.text_content().strip() for p in paras if p.text_content().strip()]
+        content = "\n".join(texts)
+        if not content:
+            return None
+        return {
+            "id": chapter_id,
+            "title": title,
+            "content": content,
+            "extra": {"site": "tongrenquan"},
+        }

novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

novel-downloader 1.4.5py3-none-any.whl → 2.0.0py3-none-any.whl