PyPI - novel-downloader - Versions diffs - 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

novel-downloader 1.5.0py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (248) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +1 -3
novel_downloader/cli/clean.py +21 -88
novel_downloader/cli/config.py +26 -21
novel_downloader/cli/download.py +79 -66
novel_downloader/cli/export.py +17 -21
novel_downloader/cli/main.py +1 -1
novel_downloader/cli/search.py +62 -65
novel_downloader/cli/ui.py +156 -0
novel_downloader/config/__init__.py +8 -5
novel_downloader/config/adapter.py +206 -209
novel_downloader/config/{loader.py → file_io.py} +53 -26
novel_downloader/core/__init__.py +5 -5
novel_downloader/core/archived/deqixs/fetcher.py +115 -0
novel_downloader/core/archived/deqixs/parser.py +132 -0
novel_downloader/core/archived/deqixs/searcher.py +89 -0
novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
novel_downloader/core/archived/wanbengo/searcher.py +98 -0
novel_downloader/core/archived/xshbook/searcher.py +93 -0
novel_downloader/core/downloaders/__init__.py +3 -24
novel_downloader/core/downloaders/base.py +49 -23
novel_downloader/core/downloaders/common.py +191 -137
novel_downloader/core/downloaders/qianbi.py +187 -146
novel_downloader/core/downloaders/qidian.py +187 -141
novel_downloader/core/downloaders/registry.py +4 -2
novel_downloader/core/downloaders/signals.py +46 -0
novel_downloader/core/exporters/__init__.py +3 -20
novel_downloader/core/exporters/base.py +33 -37
novel_downloader/core/exporters/common/__init__.py +1 -2
novel_downloader/core/exporters/common/epub.py +15 -10
novel_downloader/core/exporters/common/main_exporter.py +19 -12
novel_downloader/core/exporters/common/txt.py +17 -12
novel_downloader/core/exporters/epub_util.py +59 -29
novel_downloader/core/exporters/linovelib/__init__.py +1 -0
novel_downloader/core/exporters/linovelib/epub.py +23 -25
novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
novel_downloader/core/exporters/linovelib/txt.py +20 -14
novel_downloader/core/exporters/qidian.py +2 -8
novel_downloader/core/exporters/registry.py +4 -2
novel_downloader/core/exporters/txt_util.py +7 -7
novel_downloader/core/fetchers/__init__.py +54 -48
novel_downloader/core/fetchers/aaatxt.py +83 -0
novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
novel_downloader/core/fetchers/dxmwx.py +110 -0
novel_downloader/core/fetchers/eightnovel.py +139 -0
novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
novel_downloader/core/fetchers/guidaye.py +85 -0
novel_downloader/core/fetchers/hetushu.py +92 -0
novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
novel_downloader/core/fetchers/ixdzs8.py +113 -0
novel_downloader/core/fetchers/jpxs123.py +101 -0
novel_downloader/core/fetchers/lewenn.py +83 -0
novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
novel_downloader/core/fetchers/piaotia.py +105 -0
novel_downloader/core/fetchers/qbtr.py +101 -0
novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
novel_downloader/core/fetchers/quanben5.py +92 -0
novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
novel_downloader/core/fetchers/registry.py +5 -16
novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
novel_downloader/core/fetchers/shencou.py +106 -0
novel_downloader/core/fetchers/shuhaige.py +84 -0
novel_downloader/core/fetchers/tongrenquan.py +84 -0
novel_downloader/core/fetchers/ttkan.py +95 -0
novel_downloader/core/fetchers/wanbengo.py +83 -0
novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
novel_downloader/core/fetchers/xiguashuwu.py +177 -0
novel_downloader/core/fetchers/xs63b.py +171 -0
novel_downloader/core/fetchers/xshbook.py +85 -0
novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
novel_downloader/core/fetchers/yibige.py +114 -0
novel_downloader/core/interfaces/__init__.py +1 -9
novel_downloader/core/interfaces/downloader.py +6 -2
novel_downloader/core/interfaces/exporter.py +7 -7
novel_downloader/core/interfaces/fetcher.py +6 -19
novel_downloader/core/interfaces/parser.py +7 -8
novel_downloader/core/interfaces/searcher.py +9 -1
novel_downloader/core/parsers/__init__.py +49 -12
novel_downloader/core/parsers/aaatxt.py +132 -0
novel_downloader/core/parsers/b520.py +116 -0
novel_downloader/core/parsers/base.py +64 -12
novel_downloader/core/parsers/biquyuedu.py +133 -0
novel_downloader/core/parsers/dxmwx.py +162 -0
novel_downloader/core/parsers/eightnovel.py +224 -0
novel_downloader/core/parsers/esjzone.py +64 -69
novel_downloader/core/parsers/guidaye.py +128 -0
novel_downloader/core/parsers/hetushu.py +139 -0
novel_downloader/core/parsers/i25zw.py +137 -0
novel_downloader/core/parsers/ixdzs8.py +186 -0
novel_downloader/core/parsers/jpxs123.py +137 -0
novel_downloader/core/parsers/lewenn.py +142 -0
novel_downloader/core/parsers/linovelib.py +48 -64
novel_downloader/core/parsers/piaotia.py +189 -0
novel_downloader/core/parsers/qbtr.py +136 -0
novel_downloader/core/parsers/qianbi.py +48 -50
novel_downloader/core/parsers/qidian/main_parser.py +756 -48
novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
novel_downloader/core/parsers/quanben5.py +103 -0
novel_downloader/core/parsers/registry.py +5 -16
novel_downloader/core/parsers/sfacg.py +38 -45
novel_downloader/core/parsers/shencou.py +215 -0
novel_downloader/core/parsers/shuhaige.py +111 -0
novel_downloader/core/parsers/tongrenquan.py +116 -0
novel_downloader/core/parsers/ttkan.py +132 -0
novel_downloader/core/parsers/wanbengo.py +191 -0
novel_downloader/core/parsers/xiaoshuowu.py +173 -0
novel_downloader/core/parsers/xiguashuwu.py +429 -0
novel_downloader/core/parsers/xs63b.py +161 -0
novel_downloader/core/parsers/xshbook.py +134 -0
novel_downloader/core/parsers/yamibo.py +87 -131
novel_downloader/core/parsers/yibige.py +166 -0
novel_downloader/core/searchers/__init__.py +34 -3
novel_downloader/core/searchers/aaatxt.py +107 -0
novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
novel_downloader/core/searchers/base.py +112 -36
novel_downloader/core/searchers/dxmwx.py +105 -0
novel_downloader/core/searchers/eightnovel.py +84 -0
novel_downloader/core/searchers/esjzone.py +43 -25
novel_downloader/core/searchers/hetushu.py +92 -0
novel_downloader/core/searchers/i25zw.py +93 -0
novel_downloader/core/searchers/ixdzs8.py +107 -0
novel_downloader/core/searchers/jpxs123.py +107 -0
novel_downloader/core/searchers/piaotia.py +100 -0
novel_downloader/core/searchers/qbtr.py +106 -0
novel_downloader/core/searchers/qianbi.py +74 -40
novel_downloader/core/searchers/quanben5.py +144 -0
novel_downloader/core/searchers/registry.py +24 -8
novel_downloader/core/searchers/shuhaige.py +124 -0
novel_downloader/core/searchers/tongrenquan.py +110 -0
novel_downloader/core/searchers/ttkan.py +92 -0
novel_downloader/core/searchers/xiaoshuowu.py +122 -0
novel_downloader/core/searchers/xiguashuwu.py +95 -0
novel_downloader/core/searchers/xs63b.py +104 -0
novel_downloader/locales/en.json +34 -85
novel_downloader/locales/zh.json +35 -86
novel_downloader/models/__init__.py +21 -22
novel_downloader/models/book.py +44 -0
novel_downloader/models/config.py +4 -37
novel_downloader/models/login.py +1 -1
novel_downloader/models/search.py +5 -0
novel_downloader/resources/config/settings.toml +8 -70
novel_downloader/resources/json/xiguashuwu.json +718 -0
novel_downloader/utils/__init__.py +13 -24
novel_downloader/utils/chapter_storage.py +5 -5
novel_downloader/utils/constants.py +4 -31
novel_downloader/utils/cookies.py +38 -35
novel_downloader/utils/crypto_utils/__init__.py +7 -0
novel_downloader/utils/crypto_utils/aes_util.py +90 -0
novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
novel_downloader/utils/crypto_utils/rc4.py +54 -0
novel_downloader/utils/epub/__init__.py +3 -4
novel_downloader/utils/epub/builder.py +6 -6
novel_downloader/utils/epub/constants.py +62 -21
novel_downloader/utils/epub/documents.py +95 -201
novel_downloader/utils/epub/models.py +8 -22
novel_downloader/utils/epub/utils.py +73 -106
novel_downloader/utils/file_utils/__init__.py +2 -23
novel_downloader/utils/file_utils/io.py +53 -188
novel_downloader/utils/file_utils/normalize.py +1 -7
novel_downloader/utils/file_utils/sanitize.py +4 -15
novel_downloader/utils/fontocr/__init__.py +5 -14
novel_downloader/utils/fontocr/core.py +216 -0
novel_downloader/utils/fontocr/loader.py +50 -0
novel_downloader/utils/logger.py +81 -65
novel_downloader/utils/network.py +17 -41
novel_downloader/utils/state.py +4 -90
novel_downloader/utils/text_utils/__init__.py +1 -7
novel_downloader/utils/text_utils/diff_display.py +5 -7
novel_downloader/utils/text_utils/text_cleaner.py +39 -30
novel_downloader/utils/text_utils/truncate_utils.py +3 -14
novel_downloader/utils/time_utils/__init__.py +5 -11
novel_downloader/utils/time_utils/datetime_utils.py +20 -29
novel_downloader/utils/time_utils/sleep_utils.py +55 -49
novel_downloader/web/__init__.py +13 -0
novel_downloader/web/components/__init__.py +11 -0
novel_downloader/web/components/navigation.py +35 -0
novel_downloader/web/main.py +66 -0
novel_downloader/web/pages/__init__.py +17 -0
novel_downloader/web/pages/download.py +78 -0
novel_downloader/web/pages/progress.py +147 -0
novel_downloader/web/pages/search.py +329 -0
novel_downloader/web/services/__init__.py +17 -0
novel_downloader/web/services/client_dialog.py +164 -0
novel_downloader/web/services/cred_broker.py +113 -0
novel_downloader/web/services/cred_models.py +35 -0
novel_downloader/web/services/task_manager.py +264 -0
novel_downloader-2.0.1.dist-info/METADATA +172 -0
novel_downloader-2.0.1.dist-info/RECORD +206 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
novel_downloader/core/downloaders/biquge.py +0 -29
novel_downloader/core/downloaders/esjzone.py +0 -29
novel_downloader/core/downloaders/linovelib.py +0 -29
novel_downloader/core/downloaders/sfacg.py +0 -29
novel_downloader/core/downloaders/yamibo.py +0 -29
novel_downloader/core/exporters/biquge.py +0 -22
novel_downloader/core/exporters/esjzone.py +0 -22
novel_downloader/core/exporters/qianbi.py +0 -22
novel_downloader/core/exporters/sfacg.py +0 -22
novel_downloader/core/exporters/yamibo.py +0 -22
novel_downloader/core/fetchers/base/__init__.py +0 -14
novel_downloader/core/fetchers/base/browser.py +0 -422
novel_downloader/core/fetchers/biquge/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
novel_downloader/core/fetchers/esjzone/browser.py +0 -209
novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
novel_downloader/core/fetchers/linovelib/browser.py +0 -198
novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/__init__.py +0 -14
novel_downloader/core/fetchers/qidian/browser.py +0 -326
novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
novel_downloader/core/fetchers/sfacg/browser.py +0 -194
novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
novel_downloader/core/fetchers/yamibo/browser.py +0 -234
novel_downloader/core/parsers/biquge.py +0 -139
novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
novel_downloader/models/chapter.py +0 -25
novel_downloader/models/types.py +0 -13
novel_downloader/tui/__init__.py +0 -7
novel_downloader/tui/app.py +0 -32
novel_downloader/tui/main.py +0 -17
novel_downloader/tui/screens/__init__.py +0 -14
novel_downloader/tui/screens/home.py +0 -198
novel_downloader/tui/screens/login.py +0 -74
novel_downloader/tui/styles/home_layout.tcss +0 -79
novel_downloader/tui/widgets/richlog_handler.py +0 -24
novel_downloader/utils/cache.py +0 -24
novel_downloader/utils/crypto_utils.py +0 -71
novel_downloader/utils/fontocr/hash_store.py +0 -280
novel_downloader/utils/fontocr/hash_utils.py +0 -103
novel_downloader/utils/fontocr/model_loader.py +0 -69
novel_downloader/utils/fontocr/ocr_v1.py +0 -315
novel_downloader/utils/fontocr/ocr_v2.py +0 -764
novel_downloader/utils/fontocr/ocr_v3.py +0 -744
novel_downloader-1.5.0.dist-info/METADATA +0 -196
novel_downloader-1.5.0.dist-info/RECORD +0 -164
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0

novel_downloader/core/searchers/aaatxt.py ADDED Viewed

@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.searchers.aaatxt
+--------------------------------------
+"""
+import logging
+from lxml import html
+from novel_downloader.core.searchers.base import BaseSearcher
+from novel_downloader.core.searchers.registry import register_searcher
+from novel_downloader.models import SearchResult
+logger = logging.getLogger(__name__)
+@register_searcher(
+    site_keys=["aaatxt"],
+)
+class AaatxtSearcher(BaseSearcher):
+    site_name = "aaatxt"
+    priority = 500
+    SEARCH_URL = "http://www.aaatxt.com/search.php"
+    @classmethod
+    async def _fetch_html(cls, keyword: str) -> str:
+        # gbk / gb2312
+        params = {
+            "keyword": cls._quote(keyword, encoding="gb2312", errors="replace"),
+            "submit": cls._quote("搜 索", encoding="gb2312", errors="replace"),
+        }
+        full_url = cls._build_url(cls.SEARCH_URL, params)  # need build manually
+        headers = {
+            "Host": "www.aaatxt.com",
+            "Referer": "http://www.aaatxt.com/",
+        }
+        try:
+            async with (await cls._http_get(full_url, headers=headers)) as resp:
+                return await cls._response_to_str(resp, "gb2312")
+        except Exception:
+            logger.error(
+                "Failed to fetch HTML for keyword '%s' from '%s'",
+                keyword,
+                cls.SEARCH_URL,
+            )
+            return ""
+    @classmethod
+    def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
+        doc = html.fromstring(html_str)
+        rows = doc.xpath("//div[@class='sort']//div[@class='list']/table")
+        results: list[SearchResult] = []
+        for idx, row in enumerate(rows):
+            href = cls._first_str(row.xpath(".//td[@class='name']/h3/a/@href"))
+            if not href:
+                continue
+            if limit is not None and idx >= limit:
+                break
+            book_id = href.split("/")[-1].split(".")[0]
+            book_url = cls._abs_url(href)
+            cover_rel = cls._first_str(row.xpath(".//td[@class='cover']/a/img/@src"))
+            cover_url = cls._abs_url(cover_rel) if cover_rel else ""
+            title = cls._first_str(row.xpath(".//td[@class='name']/h3/a//text()"))
+            size_text = row.xpath("string(.//td[@class='size'])")
+            size_norm = size_text.replace("\u00a0", " ").replace("&nbsp;", " ").strip()
+            tokens = [t for t in size_norm.split() if t]
+            word_count = "-"
+            author = "-"
+            for tok in tokens:
+                if tok.startswith("大小:"):
+                    word_count = tok.split(":", 1)[1].strip()
+                elif tok.startswith("上传:"):
+                    author = tok.split(":", 1)[1].strip()
+            intro_text = row.xpath("string(.//td[@class='intro'])")
+            intro_norm = intro_text.replace("\u00a0", " ").replace("&nbsp;", " ")
+            update_date = "-"
+            for marker in ("更新:", "更新："):
+                if marker in intro_norm:
+                    tail = intro_norm.split(marker, 1)[1].strip()
+                    update_date = tail.split()[0] if tail else "-"
+                    break
+            results.append(
+                SearchResult(
+                    site=cls.site_name,
+                    book_id=book_id,
+                    book_url=book_url,
+                    cover_url=cover_url,
+                    title=title,
+                    author=author,
+                    latest_chapter="-",
+                    update_date=update_date,
+                    word_count=word_count,
+                    priority=cls.priority + idx,
+                )
+            )
+        return results

novel_downloader/core/searchers/{biquge.py → b520.py} RENAMED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-novel_downloader.core.searchers.biquge
---------------------------------------
+novel_downloader.core.searchers.b520
+------------------------------------
 """
@@ -17,57 +17,53 @@ logger = logging.getLogger(__name__)
 @register_searcher(
-    site_keys=["biquge", "bqg"],
+    site_keys=["biquge", "bqg", "b520"],
 )
 class BiqugeSearcher(BaseSearcher):
     site_name = "biquge"
-    priority = 5
+    priority = 30
+    BASE_URL = "http://www.b520.cc/"
     SEARCH_URL = "http://www.b520.cc/modules/article/search.php"
     @classmethod
-    def _fetch_html(cls, keyword: str) -> str:
-        """
-        Fetch raw HTML from Biquge's search page.
-        :param keyword: The search term to query on Biquge.
-        :return: HTML text of the search results page, or an empty string on fail.
-        """
+    async def _fetch_html(cls, keyword: str) -> str:
         params = {"searchkey": keyword}
         try:
-            response = cls._http_get(cls.SEARCH_URL, params=params)
-            return response.text
+            async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
+                return await cls._response_to_str(resp)
         except Exception:
             logger.error(
                 "Failed to fetch HTML for keyword '%s' from '%s'",
                 keyword,
                 cls.SEARCH_URL,
-                exc_info=True,
             )
             return ""
     @classmethod
     def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
-        """
-        Parse raw HTML from Biquge search results into list of SearchResult.
-        :param html_str: Raw HTML string from Biquge search results page.
-        :param limit: Maximum number of results to return, or None for all.
-        :return: List of SearchResult dicts.
-        """
         doc = html.fromstring(html_str)
         rows = doc.xpath('//table[@class="grid"]//tr[position()>1]')
         results: list[SearchResult] = []
         for idx, row in enumerate(rows):
+            href = cls._first_str(row.xpath(".//td[1]/a[1]/@href"))
+            if not href:
+                continue
             if limit is not None and idx >= limit:
                 break
-            # Title and book_id
-            title_elem = row.xpath(".//td[1]/a")[0]
-            title = title_elem.text_content().strip()
-            href = title_elem.get("href", "").strip("/")
-            book_id = href.split("/")[0] if href else ""
-            # Author
-            author = row.xpath(".//td[3]")[0].text_content().strip()
+            book_id = href.strip("/").split("/")[-1]
+            book_url = cls._abs_url(href)
+            title = cls._first_str(row.xpath(".//td[1]/a[1]/text()"))
+            latest_chapter = cls._first_str(row.xpath(".//td[2]/a[1]/text()")) or "-"
+            author = cls._first_str(row.xpath(".//td[3]//text()"))
+            word_count = cls._first_str(row.xpath(".//td[4]//text()"))
+            update_date = cls._first_str(row.xpath(".//td[5]//text()"))
             # Compute priority
             prio = cls.priority + idx
@@ -75,8 +71,13 @@ class BiqugeSearcher(BaseSearcher):
                 SearchResult(
                     site=cls.site_name,
                     book_id=book_id,
+                    book_url=book_url,
+                    cover_url="",
                     title=title,
                     author=author,
+                    latest_chapter=latest_chapter,
+                    update_date=update_date,
+                    word_count=word_count,
                     priority=prio,
                 )
             )

novel_downloader/core/searchers/base.py CHANGED Viewed

@@ -3,13 +3,14 @@
 novel_downloader.core.searchers.base
 ------------------------------------
+Abstract base class providing common utilities for site-specific searchers.
 """
 import abc
-from typing import Any
-from urllib.parse import quote_plus
+from typing import Any, ClassVar
+from urllib.parse import quote_plus, urljoin
-import requests
+import aiohttp
 from novel_downloader.core.interfaces import SearcherProtocol
 from novel_downloader.models import SearchResult
@@ -18,75 +19,150 @@ from novel_downloader.utils.constants import DEFAULT_USER_HEADERS
 class BaseSearcher(abc.ABC, SearcherProtocol):
     site_name: str
-    _session = requests.Session()
-    _DEFAULT_TIMEOUT: tuple[int, int] = (5, 10)
+    BASE_URL: str = ""
+    _session: ClassVar[aiohttp.ClientSession | None] = None
     @classmethod
-    def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
-        html = cls._fetch_html(keyword)
+    def configure(cls, session: aiohttp.ClientSession) -> None:
+        cls._session = session
+    @classmethod
+    async def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
+        html = await cls._fetch_html(keyword)
         return cls._parse_html(html, limit)
     @classmethod
     @abc.abstractmethod
-    def _fetch_html(cls, keyword: str) -> str:
-        """Get raw HTML from search API or page"""
+    async def _fetch_html(cls, keyword: str) -> str:
+        """
+        Fetch raw HTML from search API or page
+        :param keyword: The search term to query.
+        :return: HTML text of the search results page, or an empty string on fail.
+        """
         pass
     @classmethod
     @abc.abstractmethod
     def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
-        """Parse HTML into standard search result list"""
+        """
+        Parse raw HTML from search API or page into list of SearchResult.
+        :param html_str: Raw HTML string from search results page.
+        :param limit: Maximum number of results to return, or None for all.
+        :return: List of SearchResult dicts.
+        """
         pass
     @classmethod
-    def _http_get(
+    async def _http_get(
         cls,
         url: str,
         *,
         params: dict[str, str] | None = None,
         headers: dict[str, str] | None = None,
-        timeout: tuple[int, int] | None = None,
         **kwargs: Any,
-    ) -> requests.Response:
+    ) -> aiohttp.ClientResponse:
         """
-        Helper for GET requests with default headers, timeout, and error-raising.
+        Helper for GET requests with default headers.
         """
+        session = cls._ensure_session()
         hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
-        resp = cls._session.get(
-            url,
-            params=params,
-            headers=hdrs,
-            timeout=timeout or cls._DEFAULT_TIMEOUT,
-            **kwargs,
-        )
-        resp.raise_for_status()
+        resp = await session.get(url, params=params, headers=hdrs, **kwargs)
+        try:
+            resp.raise_for_status()
+        except aiohttp.ClientResponseError:
+            try:
+                await resp.read()
+            finally:
+                resp.release()
+            raise
         return resp
     @classmethod
-    def _http_post(
+    async def _http_post(
         cls,
         url: str,
         *,
         data: dict[str, str] | str | None = None,
         headers: dict[str, str] | None = None,
-        timeout: tuple[int, int] | None = None,
         **kwargs: Any,
-    ) -> requests.Response:
+    ) -> aiohttp.ClientResponse:
         """
-        Helper for POST requests with default headers, timeout, and error-raising.
+        Helper for POST requests with default headers.
         """
+        session = cls._ensure_session()
         hdrs = {**DEFAULT_USER_HEADERS, **(headers or {})}
-        resp = cls._session.post(
-            url,
-            data=data,
-            headers=hdrs,
-            timeout=timeout or cls._DEFAULT_TIMEOUT,
-            **kwargs,
-        )
-        resp.raise_for_status()
+        resp = await session.post(url, data=data, headers=hdrs, **kwargs)
+        try:
+            resp.raise_for_status()
+        except aiohttp.ClientResponseError:
+            try:
+                await resp.read()
+            finally:
+                resp.release()
+            raise
         return resp
+    @classmethod
+    def _ensure_session(cls) -> aiohttp.ClientSession:
+        if cls._session is None:
+            raise RuntimeError(
+                f"{cls.__name__} has no aiohttp session. "
+                "Call .configure(session) first."
+            )
+        return cls._session
     @staticmethod
-    def _quote(q: str) -> str:
+    def _quote(q: str, encoding: str | None = None, errors: str | None = None) -> str:
         """URL-encode a query string safely."""
-        return quote_plus(q)
+        return quote_plus(q, encoding=encoding, errors=errors)
+    @staticmethod
+    async def _response_to_str(
+        resp: aiohttp.ClientResponse,
+        encoding: str | None = None,
+    ) -> str:
+        """
+        Read the full body of resp as text. First try the declared charset,
+        then on UnicodeDecodeError fall back to a lenient utf-8 decode.
+        """
+        data: bytes = await resp.read()
+        encodings = [
+            encoding,
+            resp.charset,
+            "gb2312",
+            "gb18030",
+            "gbk",
+            "utf-8",
+        ]
+        encodings_list: list[str] = [e for e in encodings if e]
+        for enc in encodings_list:
+            try:
+                return data.decode(enc)
+            except UnicodeDecodeError:
+                continue
+        encoding = encoding or "utf-8"
+        return data.decode(encoding, errors="ignore")
+    @staticmethod
+    def _first_str(xs: list[str], replaces: list[tuple[str, str]] | None = None) -> str:
+        replaces = replaces or []
+        value: str = xs[0].strip() if xs else ""
+        for replace in replaces:
+            old, new = replace
+            value = value.replace(old, new)
+        return value
+    @staticmethod
+    def _build_url(base: str, params: dict[str, str]) -> str:
+        query_string = "&".join(f"{k}={v}" for k, v in params.items())
+        return f"{base}?{query_string}"
+    @classmethod
+    def _abs_url(cls, url: str) -> str:
+        return (
+            url
+            if url.startswith(("http://", "https://"))
+            else urljoin(cls.BASE_URL, url)
+        )

novel_downloader/core/searchers/dxmwx.py ADDED Viewed

@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.searchers.dxmwx
+-------------------------------------
+"""
+import logging
+from lxml import html
+from novel_downloader.core.searchers.base import BaseSearcher
+from novel_downloader.core.searchers.registry import register_searcher
+from novel_downloader.models import SearchResult
+logger = logging.getLogger(__name__)
+@register_searcher(
+    site_keys=["dxmwx"],
+)
+class DxmwxSearcher(BaseSearcher):
+    site_name = "dxmwx"
+    priority = 30
+    BASE_URL = "https://www.dxmwx.org"
+    SEARCH_URL = "https://www.dxmwx.org/list/{query}.html"
+    @classmethod
+    async def _fetch_html(cls, keyword: str) -> str:
+        url = cls.SEARCH_URL.format(query=cls._quote(keyword))
+        try:
+            async with (await cls._http_get(url)) as resp:
+                return await cls._response_to_str(resp)
+        except Exception:
+            logger.error(
+                "Failed to fetch HTML for keyword '%s' from '%s'",
+                keyword,
+                cls.SEARCH_URL,
+            )
+            return ""
+    @classmethod
+    def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
+        doc = html.fromstring(html_str)
+        rows = doc.xpath(
+            "//div[@id='ListContents']/div[contains(@style,'position: relative')]"
+        )
+        results: list[SearchResult] = []
+        for idx, row in enumerate(rows):
+            href = cls._first_str(
+                row.xpath(".//div[contains(@class,'margin0h5')]//a[1]/@href")
+            )
+            if not href:
+                continue
+            if limit is not None and idx >= limit:
+                break
+            book_url = cls._abs_url(href)
+            # "/book/10409.html" -> "10409"
+            book_id = href.split("/")[-1].split(".", 1)[0]
+            title = cls._first_str(
+                row.xpath(".//div[contains(@class,'margin0h5')]//a[1]/text()")
+            )
+            author = cls._first_str(
+                row.xpath(".//div[contains(@class,'margin0h5')]//a[2]/text()")
+            )
+            cover_src = cls._first_str(
+                row.xpath(".//div[contains(@class,'imgwidth')]//img/@src")
+            )
+            cover_url = cls._abs_url(cover_src) if cover_src else ""
+            latest_chapter = cls._first_str(
+                row.xpath(
+                    ".//a[span and span[contains(normalize-space(.),'最新章节')]]"
+                    "/span/following-sibling::text()[1]"
+                )
+            )
+            update_date = cls._first_str(
+                row.xpath(".//span[contains(@class,'lefth5')]/text()")
+            )
+            # Compute priority
+            prio = cls.priority + idx
+            results.append(
+                SearchResult(
+                    site=cls.site_name,
+                    book_id=book_id,
+                    book_url=book_url,
+                    cover_url=cover_url,
+                    title=title,
+                    author=author,
+                    latest_chapter=latest_chapter,
+                    update_date=update_date,
+                    word_count="-",
+                    priority=prio,
+                )
+            )
+        return results

novel_downloader/core/searchers/eightnovel.py ADDED Viewed

@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.core.searchers.eightnovel
+------------------------------------------
+"""
+import logging
+from lxml import html
+from novel_downloader.core.searchers.base import BaseSearcher
+from novel_downloader.core.searchers.registry import register_searcher
+from novel_downloader.models import SearchResult
+logger = logging.getLogger(__name__)
+@register_searcher(
+    site_keys=["eightnovel", "8novel"],
+)
+class EightnovelSearcher(BaseSearcher):
+    site_name = "8novel"
+    priority = 20
+    BASE_URL = "https://www.8novel.com"
+    SEARCH_URL = "https://www.8novel.com/search/"
+    @classmethod
+    async def _fetch_html(cls, keyword: str) -> str:
+        params = {"key": keyword}
+        try:
+            async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
+                return await cls._response_to_str(resp)
+        except Exception:
+            logger.error(
+                "Failed to fetch HTML for keyword '%s' from '%s'",
+                keyword,
+                cls.SEARCH_URL,
+            )
+            return ""
+    @classmethod
+    def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
+        doc = html.fromstring(html_str)
+        anchors = doc.xpath("//div[contains(@class,'picsize')]/a")
+        results: list[SearchResult] = []
+        for idx, a in enumerate(anchors):
+            href = cls._first_str(a.xpath("./@href"))
+            if not href:
+                continue
+            if limit is not None and idx >= limit:
+                break
+            # '/novelbooks/6045' -> "6045"
+            book_id = href.rstrip("/").split("/")[-1]
+            book_url = cls._abs_url(href)
+            cover_rel = cls._first_str(a.xpath(".//img/@src"))
+            cover_url = cls._abs_url(cover_rel) if cover_rel else ""
+            title = cls._first_str(a.xpath("./@title"))
+            word_count = cls._first_str(a.xpath(".//eps//text()")) or "-"
+            # Compute priority
+            prio = cls.priority + idx
+            results.append(
+                SearchResult(
+                    site=cls.site_name,
+                    book_id=book_id,
+                    book_url=book_url,
+                    cover_url=cover_url,
+                    title=title,
+                    author="-",
+                    latest_chapter="-",
+                    update_date="-",
+                    word_count=word_count,
+                    priority=prio,
+                )
+            )
+        return results

novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

novel-downloader 1.5.0py3-none-any.whl → 2.0.1py3-none-any.whl