PyPI - novel-downloader - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

novel-downloader 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

novel_downloader/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ novel_downloader
 Core package for the Novel Downloader project.
 """
-__version__ = "1.2.0"
+__version__ = "1.2.2"
 __author__ = "Saudade Z"
 __email__ = "saudadez217@gmail.com"

novel_downloader/cli/download.py CHANGED Viewed

@@ -57,6 +57,8 @@ def download_cli(ctx: Context, book_ids: List[str], site: str) -> None:
     parser_cfg = adapter.get_parser_config()
     saver_cfg = adapter.get_saver_config()
+    click.echo(t("download_site_mode", mode=downloader_cfg.mode))
     # If no book_ids provided on the command line, try to load them from config
     if not book_ids:
         try:

novel_downloader/config/adapter.py CHANGED Viewed

@@ -23,6 +23,7 @@ from .models import (
     RequesterConfig,
     SaverConfig,
 )
+from .site_rules import load_site_rules
 class ConfigAdapter:
@@ -38,19 +39,43 @@ class ConfigAdapter:
         self._config = config
         self._site = site
+        site_rules = load_site_rules()  # -> Dict[str, SiteRules]
+        self._supported_sites = set(site_rules.keys())
     def set_site(self, site: str) -> None:
         """
         切换当前适配的站点
         """
         self._site = site
+    def _get_site_cfg(self) -> Dict[str, Any]:
+        """
+        统一获取站点配置:
+        1. 先尝试从 self._config["sites"][self._site] 取配置
+        2. 如果没有配置, 且 self._site 在 self._supported_sites 中, 则取 sites["common"]
+        3. 否则返回空 dict
+        """
+        sites_cfg = self._config.get("sites", {}) or {}
+        # 1. site-specific config
+        if self._site in sites_cfg:
+            return sites_cfg[self._site] or {}
+        # 2. fallback to "common" only if site is supported
+        if self._site in self._supported_sites:
+            return sites_cfg.get("common", {}) or {}
+        # 3. completely unsupported site
+        return {}
     def get_requester_config(self) -> RequesterConfig:
         """
         从 config["requests"] 中读取通用请求配置 (含 DrissionPage 设置)
         返回 RequesterConfig 实例
         """
         req = self._config.get("requests", {})
-        site_cfg = self._config.get("sites", {}).get(self._site, {})
+        site_cfg = self._get_site_cfg()
         return RequesterConfig(
             wait_time=req.get("wait_time", 5),
             retry_times=req.get("retry_times", 3),
@@ -73,7 +98,7 @@ class ConfigAdapter:
         """
         gen = self._config.get("general", {})
         debug = gen.get("debug", {})
-        site_cfg = self._config.get("sites", {}).get(self._site, {})
+        site_cfg = self._get_site_cfg()
         return DownloaderConfig(
             request_interval=gen.get("request_interval", 5),
             raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
@@ -93,18 +118,21 @@ class ConfigAdapter:
         config["sites"][site] 中读取解析器相关配置, 返回 ParserConfig 实例
         """
         gen = self._config.get("general", {})
-        site_cfg = self._config.get("sites", {}).get(self._site, {})
+        font_ocr = gen.get("font_ocr", {})
+        site_cfg = self._get_site_cfg()
         return ParserConfig(
             cache_dir=gen.get("cache_dir", "./cache"),
-            decode_font=site_cfg.get("decode_font", False),
-            use_freq=site_cfg.get("use_freq", False),
-            use_ocr=site_cfg.get("use_ocr", True),
-            use_vec=site_cfg.get("use_vec", False),
-            ocr_version=site_cfg.get("ocr_version", "v1.0"),
-            save_font_debug=site_cfg.get("save_font_debug", False),
-            batch_size=site_cfg.get("batch_size", 32),
-            ocr_weight=site_cfg.get("ocr_weight", 0.6),
-            vec_weight=site_cfg.get("vec_weight", 0.4),
+            decode_font=font_ocr.get("decode_font", False),
+            use_freq=font_ocr.get("use_freq", False),
+            use_ocr=font_ocr.get("use_ocr", True),
+            use_vec=font_ocr.get("use_vec", False),
+            ocr_version=font_ocr.get("ocr_version", "v1.0"),
+            save_font_debug=font_ocr.get("save_font_debug", False),
+            batch_size=font_ocr.get("batch_size", 32),
+            gpu_mem=font_ocr.get("gpu_mem", 500),
+            gpu_id=font_ocr.get("gpu_id", None),
+            ocr_weight=font_ocr.get("ocr_weight", 0.6),
+            vec_weight=font_ocr.get("vec_weight", 0.4),
             mode=site_cfg.get("mode", "session"),
         )
@@ -136,7 +164,7 @@ class ConfigAdapter:
         """
         从 config["sites"][site]["book_ids"] 中提取目标书籍列表
         """
-        site_cfg = self._config.get("sites", {}).get(self._site, {})
+        site_cfg = self._get_site_cfg()
         raw_ids = site_cfg.get("book_ids", [])
         if isinstance(raw_ids, str):

novel_downloader/config/models.py CHANGED Viewed

@@ -24,10 +24,10 @@ from typing import Any, Dict, List, Literal, Optional, TypedDict
 # === Requesters ===
 @dataclass
 class RequesterConfig:
-    wait_time: int = 5
+    wait_time: float = 5.0
     retry_times: int = 3
-    retry_interval: int = 5
-    timeout: int = 30
+    retry_interval: float = 5.0
+    timeout: float = 30.0
     headless: bool = True
     user_data_folder: str = ""
     profile_name: str = ""
@@ -41,7 +41,7 @@ class RequesterConfig:
 # === Downloaders ===
 @dataclass
 class DownloaderConfig:
-    request_interval: int = 5
+    request_interval: float = 5.0
     raw_data_dir: str = "./raw_data"
     cache_dir: str = "./novel_cache"
     download_workers: int = 4
@@ -63,6 +63,8 @@ class ParserConfig:
     use_vec: bool = False
     ocr_version: str = "v1.0"
     batch_size: int = 32
+    gpu_mem: int = 500
+    gpu_id: Optional[int] = None
     ocr_weight: float = 0.6
     vec_weight: float = 0.4
     save_font_debug: bool = False
@@ -133,16 +135,19 @@ class ChapterFieldRules(TypedDict):
     steps: List[RuleStep]
-class VolumesRules(TypedDict, total=False):
-    has_volume: bool  # 是否存在卷，false=未分卷
+class VolumesRulesOptional(TypedDict, total=False):
     volume_selector: str  # 有卷时选择 volume 块的 selector
-    chapter_selector: str  # 选择 chapter 节点的 selector
     volume_name_steps: List[RuleStep]
-    chapter_steps: List[ChapterFieldRules]  # 提取章节信息的步骤列表
     volume_mode: str  # Optional: "normal" (default) or "mixed"
     list_selector: str  # Optional: If "mixed" mode, parent container selector
+class VolumesRules(VolumesRulesOptional):
+    has_volume: bool  # 是否存在卷，false=未分卷
+    chapter_selector: str  # 选择 chapter 节点的 selector
+    chapter_steps: List[ChapterFieldRules]  # 提取章节信息的步骤列表
 class BookInfoRules(TypedDict, total=False):
     book_name: FieldRules
     author: FieldRules

novel_downloader/core/downloaders/base_async_downloader.py CHANGED Viewed

@@ -94,7 +94,7 @@ class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
         return self._config.login_required
     @property
-    def request_interval(self) -> int:
+    def request_interval(self) -> float:
         return self._config.request_interval
     async def prepare(self) -> None:

novel_downloader/core/downloaders/common_downloader.py CHANGED Viewed

@@ -67,8 +67,7 @@ class CommonDownloader(BaseDownloader):
         cache_base = self.cache_dir / site / book_id
         info_path = raw_base / "book_info.json"
         chapter_dir = raw_base / "chapters"
-        if save_html:
-            chapters_html_dir = cache_base / "html"
+        chapters_html_dir = cache_base / "html"
         raw_base.mkdir(parents=True, exist_ok=True)
         chapter_dir.mkdir(parents=True, exist_ok=True)

novel_downloader/core/downloaders/qidian_downloader.py CHANGED Viewed

@@ -87,8 +87,7 @@ class QidianDownloader(BaseDownloader):
         info_path = raw_base / "book_info.json"
         chapter_dir = raw_base / "chapters"
         encrypted_chapter_dir = raw_base / "encrypted_chapters"
-        if save_html:
-            chapters_html_dir = cache_base / "html"
+        chapters_html_dir = cache_base / "html"
         raw_base.mkdir(parents=True, exist_ok=True)
         chapter_dir.mkdir(parents=True, exist_ok=True)

novel_downloader/core/factory/downloader_factory.py CHANGED Viewed

@@ -14,7 +14,7 @@ based on the site name and parser mode specified in the configuration.
 To add support for new sites or modes, extend the `_site_map` accordingly.
 """
-from typing import Union
+from typing import Union, cast
 from novel_downloader.config import DownloaderConfig, load_site_rules
 from novel_downloader.core.downloaders import (
@@ -137,13 +137,15 @@ def get_downloader(
     :raises TypeError: If the provided requester does not match the required protocol
                     for the chosen mode (sync vs async).
     """
-    mode = config.mode.lower()
-    if mode == "async":
-        if not isinstance(requester, AsyncRequesterProtocol):
-            raise TypeError("Async mode requires an AsyncRequesterProtocol")
-        return get_async_downloader(requester, parser, saver, site, config)
-    if mode in ("browser", "session"):
-        if not isinstance(requester, RequesterProtocol):
-            raise TypeError("Sync mode requires a RequesterProtocol")
-        return get_sync_downloader(requester, parser, saver, site, config)
-    raise ValueError(f"Unknown mode '{config.mode}' for site '{site}'")
+    if requester.is_async():
+        if config.mode.lower() != "async":
+            raise TypeError("Requester is async, but config.mode is not 'async'")
+        async_requester = cast(AsyncRequesterProtocol, requester)
+        return get_async_downloader(async_requester, parser, saver, site, config)
+    else:
+        if config.mode.lower() not in ("browser", "session"):
+            raise TypeError(
+                "Requester is sync, but config.mode is not 'browser' or 'session'"
+            )
+        sync_requester = cast(RequesterProtocol, requester)
+        return get_sync_downloader(sync_requester, parser, saver, site, config)

novel_downloader/core/interfaces/async_requester_protocol.py CHANGED Viewed

@@ -9,7 +9,7 @@ for book info pages, individual chapters, managing request lifecycle,
 and optionally retrieving a user's authenticated bookcase — all in async style.
 """
-from typing import Optional, Protocol, runtime_checkable
+from typing import Literal, Optional, Protocol, runtime_checkable
 @runtime_checkable
@@ -21,6 +21,9 @@ class AsyncRequesterProtocol(Protocol):
     and manage login/shutdown asynchronously.
     """
+    def is_async(self) -> Literal[True]:
+        ...
     async def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
         """
         Attempt to log in asynchronously.
@@ -28,7 +31,9 @@ class AsyncRequesterProtocol(Protocol):
         """
         ...
-    async def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
+    async def get_book_info(
+        self, book_id: str, wait_time: Optional[float] = None
+    ) -> str:
         """
         Fetch the raw HTML (or JSON) of the book info page asynchronously.
@@ -39,7 +44,7 @@ class AsyncRequesterProtocol(Protocol):
         ...
     async def get_book_chapter(
-        self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
+        self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
     ) -> str:
         """
         Fetch the raw HTML (or JSON) of a single chapter asynchronously.
@@ -51,7 +56,7 @@ class AsyncRequesterProtocol(Protocol):
         """
         ...
-    async def get_bookcase(self, wait_time: Optional[int] = None) -> str:
+    async def get_bookcase(self, wait_time: Optional[float] = None) -> str:
         """
         Optional: Retrieve the HTML content of the authenticated
         user's bookcase page asynchronously.

novel_downloader/core/interfaces/requester_protocol.py CHANGED Viewed

@@ -9,7 +9,7 @@ for book info pages, individual chapters, managing request lifecycle,
 and optionally retrieving a user's authenticated bookcase.
 """
-from typing import Optional, Protocol, runtime_checkable
+from typing import Literal, Optional, Protocol, runtime_checkable
 @runtime_checkable
@@ -20,13 +20,16 @@ class RequesterProtocol(Protocol):
       - a specific chapter page.
     """
+    def is_async(self) -> Literal[False]:
+        ...
     def login(self, max_retries: int = 3, manual_login: bool = False) -> bool:
         """
         Attempt to log in
         """
         ...
-    def get_book_info(self, book_id: str, wait_time: Optional[int] = None) -> str:
+    def get_book_info(self, book_id: str, wait_time: Optional[float] = None) -> str:
         """
         Fetch the raw HTML (or JSON) of the book info page.
@@ -37,7 +40,7 @@ class RequesterProtocol(Protocol):
         ...
     def get_book_chapter(
-        self, book_id: str, chapter_id: str, wait_time: Optional[int] = None
+        self, book_id: str, chapter_id: str, wait_time: Optional[float] = None
     ) -> str:
         """
         Fetch the raw HTML (or JSON) of a single chapter.
@@ -55,7 +58,7 @@ class RequesterProtocol(Protocol):
         """
         ...
-    def get_bookcase(self, wait_time: Optional[int] = None) -> str:
+    def get_bookcase(self, wait_time: Optional[float] = None) -> str:
         """
         Optional: Retrieve the HTML content of the authenticated user's bookcase page.

novel_downloader/core/parsers/base_parser.py CHANGED Viewed

@@ -45,14 +45,14 @@ class BaseParser(ParserProtocol, abc.ABC):
         self._base_cache_dir = Path(config.cache_dir)
     @abc.abstractmethod
-    def parse_book_info(self, html: str) -> Dict[str, Any]:
+    def parse_book_info(self, html_str: str) -> Dict[str, Any]:
         """
         Parse a book info page and extract metadata and chapter structure.
         Depending on the site structure, the return dict may include a
         flat `chapters` list or nested `volumes` with chapter groups.
-        :param html: Raw HTML of the book info page.
+        :param html_str: Raw HTML of the book info page.
         :return: Parsed metadata and chapter structure as a dictionary.
         """
         ...
@@ -62,7 +62,7 @@ class BaseParser(ParserProtocol, abc.ABC):
         """
         Parse a single chapter page and extract clean text or simplified HTML.
-        :param html: Raw HTML of the chapter page.
+        :param html_str: Raw HTML of the chapter page.
         :param chapter_id: Identifier of the chapter being parsed.
         :return: Cleaned chapter content as plain text or minimal HTML.
         """

novel_downloader/core/parsers/common_parser/helper.py CHANGED Viewed

@@ -188,7 +188,7 @@ class HTMLExtractor:
                     current = sep.join(current)
             elif t == "attr":
-                name = step.get("attr")
+                name = step.get("attr") or ""
                 if isinstance(current, list):
                     current = [elem.get(name, "") for elem in current]
                 elif isinstance(current, Tag):
@@ -216,9 +216,9 @@ class HTMLExtractor:
         """
         list_selector = volume_rule.get("list_selector")
         volume_selector = volume_rule.get("volume_selector")
-        chapter_selector = volume_rule.get("chapter_selector")
         volume_name_steps = volume_rule.get("volume_name_steps")
-        chapter_steps_list = volume_rule.get("chapter_steps")
+        chapter_selector = volume_rule["chapter_selector"]
+        chapter_steps_list = volume_rule["chapter_steps"]
         if not (
             list_selector and volume_selector and chapter_selector and volume_name_steps
@@ -241,6 +241,8 @@ class HTMLExtractor:
         for elem in list_area.find_all(
             [volume_selector, chapter_selector], recursive=True
         ):
+            if not isinstance(elem, Tag):
+                continue
             if elem.name == volume_selector:
                 extractor = HTMLExtractor(str(elem))
                 volume_name = extractor.extract_field(volume_name_steps)
@@ -257,9 +259,9 @@ class HTMLExtractor:
         return volumes
     def extract_volume_blocks(self, volume_rule: VolumesRules) -> List[Dict[str, Any]]:
-        volume_selector = volume_rule["volume_selector"]
+        volume_selector = volume_rule.get("volume_selector")
+        volume_name_steps = volume_rule.get("volume_name_steps")
         chapter_selector = volume_rule["chapter_selector"]
-        volume_name_steps = volume_rule["volume_name_steps"]
         chapter_steps_list = volume_rule["chapter_steps"]
         if not (volume_selector and volume_name_steps):
             raise ValueError(

novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py CHANGED Viewed

@@ -229,7 +229,7 @@ def extract_paragraphs_recursively(
     if chapter_id:
         main_id = f"c-{chapter_id}"
         main_tag = soup.find("main", id=main_id)
-        if not main_tag:
+        if not isinstance(main_tag, Tag):
             return []
     else:
         main_tag = soup

novel_downloader/core/parsers/qidian_parser/browser/main_parser.py CHANGED Viewed

@@ -60,6 +60,8 @@ class QidianBrowserParser(BaseParser):
                 use_ocr=config.use_ocr,
                 use_vec=config.use_vec,
                 batch_size=config.batch_size,
+                gpu_mem=config.gpu_mem,
+                gpu_id=config.gpu_id,
                 ocr_weight=config.ocr_weight,
                 vec_weight=config.vec_weight,
                 font_debug=config.save_font_debug,
@@ -67,14 +69,14 @@ class QidianBrowserParser(BaseParser):
             self._font_debug_dir = self._base_cache_dir / "font_debug"
             self._font_debug_dir.mkdir(parents=True, exist_ok=True)
-    def parse_book_info(self, html: str) -> Dict[str, Any]:
+    def parse_book_info(self, html_str: str) -> Dict[str, Any]:
         """
         Parse a book info page and extract metadata and chapter structure.
-        :param html: Raw HTML of the book info page.
+        :param html_str: Raw HTML of the book info page.
         :return: Parsed metadata and chapter structure as a dictionary.
         """
-        return parse_book_info(html)
+        return parse_book_info(html_str)
     def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
         """

novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py CHANGED Viewed

@@ -245,7 +245,7 @@ def extract_paragraphs_recursively(
     if chapter_id > 0:
         main_id = f"c-{chapter_id}"
         main_tag = soup.find("main", id=main_id)
-        if not main_tag:
+        if not isinstance(main_tag, Tag):
             return []
     else:
         main_tag = soup

novel_downloader/core/parsers/qidian_parser/session/main_parser.py CHANGED Viewed

@@ -63,6 +63,8 @@ class QidianSessionParser(BaseParser):
                 use_ocr=config.use_ocr,
                 use_vec=config.use_vec,
                 batch_size=config.batch_size,
+                gpu_mem=config.gpu_mem,
+                gpu_id=config.gpu_id,
                 ocr_weight=config.ocr_weight,
                 vec_weight=config.vec_weight,
                 font_debug=config.save_font_debug,
@@ -70,14 +72,14 @@ class QidianSessionParser(BaseParser):
             self._font_debug_dir = self._base_cache_dir / "font_debug"
             self._font_debug_dir.mkdir(parents=True, exist_ok=True)
-    def parse_book_info(self, html: str) -> Dict[str, Any]:
+    def parse_book_info(self, html_str: str) -> Dict[str, Any]:
         """
         Parse a book info page and extract metadata and chapter structure.
-        :param html: Raw HTML of the book info page.
+        :param html_str: Raw HTML of the book info page.
         :return: Parsed metadata and chapter structure as a dictionary.
         """
-        return parse_book_info(html)
+        return parse_book_info(html_str)
     def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
         """

novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py CHANGED Viewed

@@ -41,6 +41,58 @@ def _get_volume_name(vol_div: Tag) -> str:
     return text.split(chr(183))[0].strip()
+def safe_select_text(
+    soup: Tag,
+    selector: str,
+    *,
+    separator: str = "",
+    strip: bool = False,
+    default: str = "",
+) -> str:
+    """
+    Safely select the first element matching a CSS selector and return its text.
+    :param soup: A BeautifulSoup Tag or sub-tree to query.
+    :param selector: A CSS selector string.
+    :param separator: Separator to use between strings when joining.
+    :param strip: Whether to strip whitespace from the result.
+    :param default: Value to return if no element is found.
+    :return: The element's text, or `default` if not found.
+    """
+    tag = soup.select_one(selector)
+    return (
+        tag.get_text(separator=separator, strip=strip)
+        if isinstance(tag, Tag)
+        else default
+    )
+def safe_select_attr(
+    soup: Tag,
+    selector: str,
+    attr: str,
+    *,
+    default: str = "",
+) -> str:
+    """
+    Safely select the first element matching a CSS selector and return one attributes.
+    :param soup: A BeautifulSoup Tag or sub-tree to query.
+    :param selector: A CSS selector string.
+    :param attr: The attribute name to retrieve from the selected element.
+    :param default: Value to return if no element or attribute is found.
+    :return: The attribute's value stripped of whitespace, or `default` if not found.
+    """
+    tag = soup.select_one(selector)
+    if isinstance(tag, Tag) and attr in tag.attrs:
+        value = tag.attrs[attr]
+        if isinstance(value, list):
+            return " ".join(value).strip()
+        elif isinstance(value, str):
+            return value.strip()
+    return default
 def parse_book_info(html_str: str) -> Dict[str, Any]:
     """
     Extract metadata: title, author, cover_url, update_time, status,
@@ -52,27 +104,24 @@ def parse_book_info(html_str: str) -> Dict[str, Any]:
     info: Dict[str, Any] = {}
     try:
         soup = html_to_soup(html_str)
-        info["book_name"] = soup.select_one("em#bookName").get_text(strip=True)
-        info["author"] = soup.select_one("a.writer").get_text(strip=True)
-        info["cover_url"] = soup.select_one("div.book-img img")["src"].strip()
+        info["book_name"] = safe_select_text(soup, "em#bookName", strip=True)
+        info["author"] = safe_select_text(soup, "a.writer", strip=True)
+        info["cover_url"] = safe_select_attr(soup, "div.book-img img", "src")
         info["update_time"] = (
-            soup.select_one("span.book-update-time")
-            .get_text(strip=True)
+            safe_select_text(soup, "span.book-update-time", strip=True)
             .replace("更新时间", "")
             .strip()
         )
-        info["serial_status"] = soup.select_one("span.blue").get_text(strip=True)
-        # word count via regex
-        match = re.search(
-            r"<em>([\d.]+)</em>\s*<cite>(.*?)字</cite>",
-            html_str,
+        info["serial_status"] = safe_select_text(soup, "span.blue", strip=True)
+        # Word count via regex fallback
+        match = re.search(r"<em>([\d.]+)</em>\s*<cite>(.*?)字</cite>", html_str)
+        info["word_count"] = (
+            f"{match.group(1)}{match.group(2)}字" if match else "Unknown"
         )
-        if match:
-            info["word_count"] = match.group(1) + match.group(2) + "字"
-        else:
-            info["word_count"] = "Unknown"
-        info["summary"] = soup.select_one("div.book-intro p").get_text(
-            separator="\n", strip=True
+        info["summary"] = safe_select_text(
+            soup, "div.book-intro p", separator="\n", strip=True
         )
         # volumes
         vols = []
@@ -81,11 +130,18 @@ def parse_book_info(html_str: str) -> Dict[str, Any]:
             chaps = []
             for li in vol_div.select("li"):
                 a = li.select_one("a")
+                if not isinstance(a, Tag) or "href" not in a.attrs:
+                    continue
+                href_val = a["href"]
+                if isinstance(href_val, list):
+                    href = href_val[0].strip()
+                else:
+                    href = str(href_val).strip()
                 chaps.append(
                     {
                         "title": a.get_text(strip=True),
-                        "url": a["href"].strip(),
-                        "chapterId": _chapter_url_to_id(a["href"]),
+                        "url": href,
+                        "chapterId": _chapter_url_to_id(href),
                     }
                 )
             vols.append({"volume_name": name, "chapters": chaps})

novel_downloader/core/parsers/qidian_parser/shared/helpers.py CHANGED Viewed

@@ -16,7 +16,7 @@ import json
 import logging
 from typing import Any, Dict, Union
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 logger = logging.getLogger(__name__)
@@ -103,7 +103,7 @@ def find_ssr_page_context(soup: BeautifulSoup) -> Dict[str, Any]:
     """
     try:
         tag = soup.find("script", id="vite-plugin-ssr_pageContext")
-        if tag and tag.string:
+        if isinstance(tag, Tag) and tag.string:
             data: Dict[str, Any] = json.loads(tag.string.strip())
             return data
     except Exception as e:

novel-downloader 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

novel-downloader 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl