PyPI - novel-downloader - Versions diffs - 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

novel-downloader 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

novel_downloader/core/parsers/{qidian_parser → qidian}/browser/main_parser.py RENAMED Viewed

@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
-novel_downloader.core.parsers.qidian_parser.browser.main_parser
----------------------------------------------------------------
+novel_downloader.core.parsers.qidian.browser.main_parser
+--------------------------------------------------------
 Main parser class for handling Qidian chapters rendered via a browser environment.
@@ -13,10 +12,11 @@ content extracted from dynamically rendered Qidian HTML pages.
 from __future__ import annotations
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, Optional
+from typing import TYPE_CHECKING, Any
 from novel_downloader.config.models import ParserConfig
-from novel_downloader.core.parsers.base_parser import BaseParser
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.utils.chapter_storage import ChapterDict
 from ..shared import (
     is_encrypted,
@@ -47,9 +47,9 @@ class QidianBrowserParser(BaseParser):
         self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
         self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
-        self._font_debug_dir: Optional[Path] = None
+        self._font_debug_dir: Path | None = None
-        self._font_ocr: Optional[FontOCR] = None
+        self._font_ocr: FontOCR | None = None
         if self._decode_font:
             from novel_downloader.utils.fontocr import FontOCR
@@ -66,19 +66,23 @@ class QidianBrowserParser(BaseParser):
                 vec_weight=config.vec_weight,
                 font_debug=config.save_font_debug,
             )
-            self._font_debug_dir = self._base_cache_dir / "font_debug"
+            self._font_debug_dir = self._base_cache_dir / "qidian" / "font_debug"
             self._font_debug_dir.mkdir(parents=True, exist_ok=True)
-    def parse_book_info(self, html: str) -> Dict[str, Any]:
+    def parse_book_info(self, html_str: str) -> dict[str, Any]:
         """
         Parse a book info page and extract metadata and chapter structure.
-        :param html: Raw HTML of the book info page.
+        :param html_str: Raw HTML of the book info page.
         :return: Parsed metadata and chapter structure as a dictionary.
         """
-        return parse_book_info(html)
+        return parse_book_info(html_str)
-    def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
+    def parse_chapter(
+        self,
+        html_str: str,
+        chapter_id: str,
+    ) -> ChapterDict | None:
         """
         :param html: Raw HTML of the chapter page.
         :param chapter_id: Identifier of the chapter being parsed.

novel_downloader/core/parsers/{qidian_parser → qidian}/session/__init__.py RENAMED Viewed

@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
-novel_downloader.core.parsers.qidian_parser.session
----------------------------------------------------------------
+novel_downloader.core.parsers.qidian.session
+--------------------------------------------
 This package provides parsing components for handling Qidian
 pages that have been rendered by a session.

novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_encrypted.py RENAMED Viewed

@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
-novel_downloader.core.parsers.qidian_parser.session.chapter_encrypted
----------------------------------------------------------------------
+novel_downloader.core.parsers.qidian.session.chapter_encrypted
+--------------------------------------------------------------
 Support for parsing encrypted chapters from Qidian using font OCR mapping,
 CSS rules, and custom rendering logic.
@@ -19,11 +18,12 @@ from __future__ import annotations
 import json
 import logging
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any
 import tinycss2
 from bs4 import BeautifulSoup, Tag
+from novel_downloader.utils.chapter_storage import ChapterDict
 from novel_downloader.utils.network import download_font_file
 from novel_downloader.utils.text_utils import apply_font_mapping
@@ -40,7 +40,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 IGNORED_CLASS_LISTS = {"title", "review"}
-_decryptor: Optional[QidianNodeDecryptor] = None
+_decryptor: QidianNodeDecryptor | None = None
 def _get_decryptor() -> QidianNodeDecryptor:
@@ -58,7 +58,7 @@ def parse_encrypted_chapter(
     soup: BeautifulSoup,
     chapter_id: str,
     fuid: str,
-) -> Dict[str, Any]:
+) -> ChapterDict | None:
     """
     Extract and return the formatted textual content of an encrypted chapter.
@@ -75,15 +75,15 @@ def parse_encrypted_chapter(
     """
     try:
         if not (parser._decode_font and parser._font_ocr):
-            return {}
+            return None
         ssr_data = find_ssr_page_context(soup)
         chapter_info = extract_chapter_info(ssr_data)
         if not chapter_info:
             logger.warning(
                 "[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
             )
-            return {}
-        debug_base_dir: Optional[Path] = None
+            return None
+        debug_base_dir: Path | None = None
         if parser._font_debug_dir:
             debug_base_dir = parser._font_debug_dir / chapter_id
             debug_base_dir.mkdir(parents=True, exist_ok=True)
@@ -101,15 +101,12 @@ def parse_encrypted_chapter(
         update_timestamp = chapter_info.get("updateTimestamp", 0)
         modify_time = chapter_info.get("modifyTime", 0)
         word_count = chapter_info.get("wordsCount", 0)
-        vip = bool(chapter_info.get("vipStatus", 0))
-        is_buy = bool(chapter_info.get("isBuy", 0))
         seq = chapter_info.get("seq", None)
-        order = chapter_info.get("chapterOrder", None)
         volume = chapter_info.get("extra", {}).get("volumeName", "")
         if not raw_html:
             logger.warning("[Parser] raw_html not found for chapter '%s'", chapter_id)
-            return {}
+            return None
         # extract + save font
         rf = json.loads(randomFont_str)
@@ -136,7 +133,7 @@ def parse_encrypted_chapter(
                 )
             except Exception as e:
                 logger.error("[Parser] decryption failed for '%s': %s", chapter_id, e)
-                return {}
+                return None
         main_paragraphs = extract_paragraphs_recursively(html_to_soup(raw_html))
         if debug_base_dir:
             main_paragraphs_path = debug_base_dir / "main_paragraphs_debug.json"
@@ -159,7 +156,7 @@ def parse_encrypted_chapter(
             paragraphs_str_path.write_text(paragraphs_str, encoding="utf-8")
         # Run OCR + fallback mapping
-        char_set = set(c for c in paragraphs_str if c not in {" ", "\n", "\u3000"})
+        char_set = {c for c in paragraphs_str if c not in {" ", "\n", "\u3000"}}
         refl_set = set(refl_list)
         char_set = char_set - refl_set
         if debug_base_dir:
@@ -190,33 +187,31 @@ def parse_encrypted_chapter(
         final_paragraphs_str = "\n\n".join(
             line.strip() for line in original_text.splitlines() if line.strip()
         )
-        chapter_info = {
+        return {
             "id": str(chapter_id),
-            "title": title,
+            "title": str(title),
             "content": final_paragraphs_str,
-            "author_say": author_say.strip() if author_say else "",
-            "updated_at": update_time,
-            "update_timestamp": update_timestamp,
-            "modify_time": modify_time,
-            "word_count": word_count,
-            "vip": vip,
-            "purchased": is_buy,
-            "order": order,
-            "seq": seq,
-            "volume": volume,
+            "extra": {
+                "author_say": author_say.strip() if author_say else "",
+                "updated_at": update_time,
+                "update_timestamp": update_timestamp,
+                "modify_time": modify_time,
+                "word_count": word_count,
+                "seq": seq,
+                "volume": volume,
+            },
         }
-        return chapter_info
     except Exception as e:
         logger.warning(
             "[Parser] parse error for encrypted chapter '%s': %s", chapter_id, e
         )
-    return {}
+    return None
 def extract_paragraphs_recursively(
     soup: BeautifulSoup, chapter_id: int = -1
-) -> List[Dict[str, Any]]:
+) -> list[dict[str, Any]]:
     """
     Extracts paragraph elements under <main id="c-{chapter_id}"> from HTML
     and converts them to a nested data structure for further processing.
@@ -227,7 +222,7 @@ def extract_paragraphs_recursively(
     :return list: List of parsed <p> paragraph data.
     """
-    def parse_element(elem: Any) -> Union[Dict[str, Any], None]:
+    def parse_element(elem: Any) -> dict[str, Any] | None:
         if not isinstance(elem, Tag):
             return None
         result = {"tag": elem.name, "attrs": dict(elem.attrs), "data": []}
@@ -245,7 +240,7 @@ def extract_paragraphs_recursively(
     if chapter_id > 0:
         main_id = f"c-{chapter_id}"
         main_tag = soup.find("main", id=main_id)
-        if not main_tag:
+        if not isinstance(main_tag, Tag):
             return []
     else:
         main_tag = soup
@@ -259,7 +254,7 @@ def extract_paragraphs_recursively(
     return result
-def parse_rule(css_str: str) -> Dict[str, Any]:
+def parse_rule(css_str: str) -> dict[str, Any]:
     """
     Parse a CSS string and extract style rules for rendering.
@@ -274,7 +269,7 @@ def parse_rule(css_str: str) -> Dict[str, Any]:
     :return: Dict with "rules" and "orders" for rendering.
     """
-    rules: Dict[str, Any] = {}
+    rules: dict[str, Any] = {}
     orders = []
     stylesheet = tinycss2.parse_stylesheet(
@@ -339,8 +334,8 @@ def parse_rule(css_str: str) -> Dict[str, Any]:
 def render_paragraphs(
-    main_paragraphs: List[Dict[str, Any]], rules: Dict[str, Any]
-) -> Tuple[str, List[str]]:
+    main_paragraphs: list[dict[str, Any]], rules: dict[str, Any]
+) -> tuple[str, list[str]]:
     """
     Applies the parsed CSS rules to the paragraph structure and
     reconstructs the visible text.
@@ -358,11 +353,11 @@ def render_paragraphs(
         - A reconstructed paragraph string with line breaks.
         - A list of mirrored (reflected) characters for later OCR processing.
     """
-    orders: List[Tuple[str, str]] = rules.get("orders", [])
+    orders: list[tuple[str, str]] = rules.get("orders", [])
     rules = rules.get("rules", {})
-    refl_list: List[str] = []
+    refl_list: list[str] = []
-    def apply_rule(data: Dict[str, Any], rule: Dict[str, Any]) -> str:
+    def apply_rule(data: dict[str, Any], rule: dict[str, Any]) -> str:
         if rule.get("delete-all", False):
             return ""
@@ -373,10 +368,7 @@ def render_paragraphs(
                 curr_str += first_data
         if rule.get("delete-first", False):
-            if len(curr_str) <= 1:
-                curr_str = ""
-            else:
-                curr_str = curr_str[1:]
+            curr_str = "" if len(curr_str) <= 1 else curr_str[1:]
         curr_str += rule.get("append-end-char", "")
@@ -433,7 +425,7 @@ def render_paragraphs(
                     logger.debug(f"[parser] not find p_class_str: {class_list}")
                     continue
                 # 普通标签处理，根据 orders 顺序匹配
-                for ord_selector, ord_id in orders:
+                for ord_selector, _ in orders:
                     tag_name = f"{ord_selector}"
                     if data.get("tag") != tag_name:
                         continue
@@ -442,7 +434,7 @@ def render_paragraphs(
                     ordered_cache[ord_selector] = apply_rule(data, curr_rule)
                     break
         # 最后按 orders 顺序拼接
-        for ord_selector, ord_id in orders:
+        for ord_selector, _ in orders:
             if ord_selector in ordered_cache:
                 paragraphs_str += ordered_cache[ord_selector]

novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_normal.py RENAMED Viewed

@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
-novel_downloader.core.parsers.qidian_parser.session.chapter_normal
-------------------------------------------------------------------
+novel_downloader.core.parsers.qidian.session.chapter_normal
+-----------------------------------------------------------
 Provides `parse_normal_chapter`, which will:
@@ -12,10 +11,11 @@ Provides `parse_normal_chapter`, which will:
 """
 import logging
-from typing import Any, Dict, Optional
 from bs4 import BeautifulSoup
+from novel_downloader.utils.chapter_storage import ChapterDict
 from ..shared import (
     extract_chapter_info,
     find_ssr_page_context,
@@ -25,7 +25,7 @@ from ..shared import (
 from .node_decryptor import QidianNodeDecryptor
 logger = logging.getLogger(__name__)
-_decryptor: Optional[QidianNodeDecryptor] = None
+_decryptor: QidianNodeDecryptor | None = None
 def _get_decryptor() -> QidianNodeDecryptor:
@@ -42,7 +42,7 @@ def parse_normal_chapter(
     soup: BeautifulSoup,
     chapter_id: str,
     fuid: str,
-) -> Dict[str, Any]:
+) -> ChapterDict | None:
     """
     Extract structured chapter info from a normal Qidian page.
@@ -58,7 +58,7 @@ def parse_normal_chapter(
             logger.warning(
                 "[Parser] ssr_chapterInfo not found for chapter '%s'", chapter_id
             )
-            return {}
+            return None
         title = chapter_info.get("chapterName", "Untitled")
         raw_html = chapter_info.get("content", "")
@@ -69,15 +69,12 @@ def parse_normal_chapter(
         update_timestamp = chapter_info.get("updateTimestamp", 0)
         modify_time = chapter_info.get("modifyTime", 0)
         word_count = chapter_info.get("wordsCount", 0)
-        vip = bool(chapter_info.get("vipStatus", 0))
-        is_buy = bool(chapter_info.get("isBuy", 0))
         seq = chapter_info.get("seq", None)
-        order = chapter_info.get("chapterOrder", None)
         volume = chapter_info.get("extra", {}).get("volumeName", "")
         if not raw_html:
             logger.warning("[Parser] raw_html not found for chapter '%s'", chapter_id)
-            return {}
+            return None
         if vip_status(soup):
             try:
@@ -90,7 +87,7 @@ def parse_normal_chapter(
                 )
             except Exception as e:
                 logger.error("[Parser] decryption failed for '%s': %s", chapter_id, e)
-                return {}
+                return None
         paras_soup = html_to_soup(raw_html)
         paras = [p.get_text(strip=True) for p in paras_soup.find_all("p")]
@@ -100,20 +97,19 @@ def parse_normal_chapter(
             "id": str(chapter_id),
             "title": title,
             "content": chapter_text,
-            "author_say": author_say.strip() if author_say else "",
-            "updated_at": update_time,
-            "update_timestamp": update_timestamp,
-            "modify_time": modify_time,
-            "word_count": word_count,
-            "vip": vip,
-            "purchased": is_buy,
-            "order": order,
-            "seq": seq,
-            "volume": volume,
+            "extra": {
+                "author_say": author_say.strip() if author_say else "",
+                "updated_at": update_time,
+                "update_timestamp": update_timestamp,
+                "modify_time": modify_time,
+                "word_count": word_count,
+                "seq": seq,
+                "volume": volume,
+            },
         }
     except Exception as e:
         logger.warning(
             "[Parser] parse error for normal chapter '%s': %s", chapter_id, e
         )
-        return {}
+    return None

novel_downloader/core/parsers/{qidian_parser → qidian}/session/chapter_router.py RENAMED Viewed

@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
-novel_downloader.core.parsers.qidian_parser.session.chapter_router
-------------------------------------------------------------------
+novel_downloader.core.parsers.qidian.session.chapter_router
+-----------------------------------------------------------
 Routing logic for selecting the correct chapter parser for Qidian session pages.
 """
@@ -10,7 +9,9 @@ Routing logic for selecting the correct chapter parser for Qidian session pages.
 from __future__ import annotations
 import logging
-from typing import TYPE_CHECKING, Any, Dict
+from typing import TYPE_CHECKING
+from novel_downloader.utils.chapter_storage import ChapterDict
 from ..shared import (
     can_view_chapter,
@@ -29,7 +30,7 @@ def parse_chapter(
     parser: QidianSessionParser,
     html_str: str,
     chapter_id: str,
-) -> Dict[str, Any]:
+) -> ChapterDict | None:
     """
     Extract and return the formatted textual content of chapter.
@@ -45,11 +46,11 @@ def parse_chapter(
             logger.warning(
                 "[Parser] Chapter '%s' is not purchased or inaccessible.", chapter_id
             )
-            return {}
+            return None
         if is_encrypted(soup):
             if not parser._decode_font:
-                return {}
+                return None
             try:
                 from .chapter_encrypted import parse_encrypted_chapter
@@ -59,9 +60,9 @@ def parse_chapter(
                     "[Parser] Encrypted chapter '%s' requires extra dependencies.",
                     chapter_id,
                 )
-                return {}
+                return None
         return parse_normal_chapter(soup, chapter_id, parser._fuid)
     except Exception as e:
         logger.warning("[Parser] parse error for chapter '%s': %s", chapter_id, e)
-        return {}
+    return None

novel_downloader/core/parsers/{qidian_parser → qidian}/session/main_parser.py RENAMED Viewed

@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
-novel_downloader.core.parsers.qidian_parser.session.main_parser
----------------------------------------------------------------
+novel_downloader.core.parsers.qidian.session.main_parser
+--------------------------------------------------------
 Main parser class for handling Qidian chapters rendered via a session.
@@ -13,10 +12,11 @@ content extracted from dynamically rendered Qidian HTML pages.
 from __future__ import annotations
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, Optional
+from typing import TYPE_CHECKING, Any
 from novel_downloader.config.models import ParserConfig
-from novel_downloader.core.parsers.base_parser import BaseParser
+from novel_downloader.core.parsers.base import BaseParser
+from novel_downloader.utils.chapter_storage import ChapterDict
 from novel_downloader.utils.state import state_mgr
 from ..shared import (
@@ -48,12 +48,12 @@ class QidianSessionParser(BaseParser):
         self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
         self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
-        self._font_debug_dir: Optional[Path] = None
+        self._font_debug_dir: Path | None = None
         qd_cookies = state_mgr.get_cookies("qidian")
         self._fuid: str = qd_cookies.get("ywguid", "")
-        self._font_ocr: Optional[FontOCR] = None
+        self._font_ocr: FontOCR | None = None
         if self._decode_font:
             from novel_downloader.utils.fontocr import FontOCR
@@ -69,19 +69,23 @@ class QidianSessionParser(BaseParser):
                 vec_weight=config.vec_weight,
                 font_debug=config.save_font_debug,
             )
-            self._font_debug_dir = self._base_cache_dir / "font_debug"
+            self._font_debug_dir = self._base_cache_dir / "qidian" / "font_debug"
             self._font_debug_dir.mkdir(parents=True, exist_ok=True)
-    def parse_book_info(self, html: str) -> Dict[str, Any]:
+    def parse_book_info(self, html_str: str) -> dict[str, Any]:
         """
         Parse a book info page and extract metadata and chapter structure.
-        :param html: Raw HTML of the book info page.
+        :param html_str: Raw HTML of the book info page.
         :return: Parsed metadata and chapter structure as a dictionary.
         """
-        return parse_book_info(html)
+        return parse_book_info(html_str)
-    def parse_chapter(self, html_str: str, chapter_id: str) -> Dict[str, Any]:
+    def parse_chapter(
+        self,
+        html_str: str,
+        chapter_id: str,
+    ) -> ChapterDict | None:
         """
         :param html: Raw HTML of the chapter page.
         :param chapter_id: Identifier of the chapter being parsed.

novel_downloader/core/parsers/{qidian_parser → qidian}/session/node_decryptor.py RENAMED Viewed

@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
-novel_downloader.core.parsers.qidian_parser.session.node_decryptor
-------------------------------------------------------------------
+novel_downloader.core.parsers.qidian.session.node_decryptor
+-----------------------------------------------------------
 Provides QidianNodeDecryptor, which ensures a Node.js environment,
 downloads or installs the required JS modules (Fock + decrypt script),
@@ -15,7 +14,6 @@ import shutil
 import subprocess
 import uuid
 from pathlib import Path
-from typing import Union
 from novel_downloader.utils.constants import (
     JS_SCRIPT_DIR,
@@ -68,7 +66,7 @@ class QidianNodeDecryptor:
         """
         # 1) Check Node.js
         if not shutil.which("node"):
-            raise EnvironmentError("Node.js is not installed or not in PATH.")
+            raise OSError("Node.js is not installed or not in PATH.")
         # 2) Copy bundled decrypt script into place if missing
         if not self.QIDIAN_DECRYPT_SCRIPT_PATH.exists():
@@ -102,8 +100,8 @@ class QidianNodeDecryptor:
     def decrypt(
         self,
-        ciphertext: Union[str, bytes],
-        chapter_id: Union[str, int],
+        ciphertext: str | bytes,
+        chapter_id: str | int,
         fkp: str,
         fuid: str,
     ) -> str:
@@ -120,7 +118,7 @@ class QidianNodeDecryptor:
         # Normalize inputs
         cipher_str = (
             ciphertext.decode("utf-8")
-            if isinstance(ciphertext, (bytes, bytearray))
+            if isinstance(ciphertext, (bytes | bytearray))
             else str(ciphertext)
         )
         chapter_str = str(chapter_id)
@@ -146,8 +144,7 @@ class QidianNodeDecryptor:
             proc = subprocess.run(
                 ["node", self.script_path.name, input_path.name, output_path.name],
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
+                capture_output=True,
                 text=True,
                 cwd=str(self.script_dir),
             )

novel_downloader/core/parsers/{qidian_parser → qidian}/shared/__init__.py RENAMED Viewed

@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
-novel_downloader.core.parsers.qidian_parser.shared
---------------------------------------------------
+novel_downloader.core.parsers.qidian.shared
+-------------------------------------------
 Shared parsing utilities for Qidian parser components.

novel-downloader 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

novel-downloader 1.2.1py3-none-any.whl → 1.3.0py3-none-any.whl