PyPI - novel-downloader - Versions diffs - 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl - Mend

novel-downloader 1.4.1py3-none-any.whl → 1.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

novel_downloader/core/fetchers/qidian/browser.py CHANGED Viewed

@@ -5,6 +5,7 @@ novel_downloader.core.fetchers.qidian.browser
 """
+import asyncio
 from typing import Any
 from playwright.async_api import Page
@@ -189,18 +190,35 @@ class QidianBrowser(BaseBrowser):
         """
         try:
             page = await self.context.new_page()
-            await page.goto(self.HOMEPAGE_URL, wait_until="networkidle")
             await self._login_auto(page)
             await self._dismiss_overlay(page)
-            sign_in_elem = await page.query_selector(".sign-in")
-            if sign_in_elem and await sign_in_elem.is_visible():
-                self.logger.debug("[auth] Sign-in element visible.")
-                await page.close()
-                return False
-            else:
-                self.logger.debug("[auth] Sign-in element not found.")
-                await page.close()
+            await page.goto(self.HOMEPAGE_URL, wait_until="networkidle")
+            sign_in_elem = await page.query_selector("#login-box .sign-in")
+            sign_out_elem = await page.query_selector("#login-box .sign-out")
+            sign_in_class = (
+                (await sign_in_elem.get_attribute("class") or "")
+                if sign_in_elem
+                else ""
+            )
+            sign_out_class = (
+                (await sign_out_elem.get_attribute("class") or "")
+                if sign_out_elem
+                else ""
+            )
+            sign_in_hidden = "hidden" in sign_in_class
+            sign_out_hidden = "hidden" in sign_out_class
+            await page.close()
+            # if sign_in_visible and not sign_out_visible:
+            if not sign_in_hidden and sign_out_hidden:
+                self.logger.debug("[auth] Detected as logged in.")
                 return True
+            else:
+                self.logger.debug("[auth] Detected as not logged in.")
+                return False
         except Exception as e:
             self.logger.warning("[auth] Error while checking login status: %s", e)
         return False
@@ -220,7 +238,10 @@ class QidianBrowser(BaseBrowser):
             self.logger.debug("[auth] Overlay mask detected; attempting to close.")
-            iframe_element = await page.query_selector('iframe[name="loginIfr"]')
+            iframe_element = await page.wait_for_selector(
+                "#loginIfr",
+                timeout=timeout * 1000,
+            )
             if iframe_element is None:
                 self.logger.debug("[auth] Login iframe not found.")
                 return
@@ -261,6 +282,37 @@ class QidianBrowser(BaseBrowser):
             btn = await page.query_selector("#login-btn")
             if btn and await btn.is_visible():
                 await btn.click()
+                tasks = [
+                    asyncio.create_task(
+                        page.wait_for_selector(
+                            "div.mask",
+                            timeout=timeout * 1000,
+                        )
+                    ),
+                    asyncio.create_task(
+                        page.wait_for_selector(
+                            "div.qdlogin-wrap",
+                            timeout=timeout * 1000,
+                        )
+                    ),
+                    asyncio.create_task(
+                        page.wait_for_url(
+                            lambda url: "login" not in url,
+                            timeout=timeout * 1000,
+                        )
+                    ),
+                ]
+                done, pending = await asyncio.wait(
+                    tasks,
+                    timeout=timeout + 1,
+                    return_when=asyncio.FIRST_COMPLETED,
+                )
+                for task in pending:
+                    task.cancel()
+                if done:
+                    self.logger.debug("[auth] Login flow proceeded after button click.")
+                else:
+                    self.logger.warning("[auth] Timeout waiting for login to proceed.")
         except Exception as e:
             self.logger.debug("[auth] Failed to click login button: %s", e)
         return

novel_downloader/core/fetchers/yamibo/browser.py CHANGED Viewed

@@ -48,8 +48,8 @@ class YamiboBrowser(BaseBrowser):
             return False
         for i in range(1, attempt + 1):
+            login_page = await self.context.new_page()
             try:
-                login_page = await self.context.new_page()
                 await login_page.goto(self.LOGIN_URL, wait_until="networkidle")
                 await login_page.fill("#loginform-username", username)
@@ -68,8 +68,6 @@ class YamiboBrowser(BaseBrowser):
                         f"[auth] No URL change after login attempt {i}: {e}"
                     )
-                await login_page.close()
                 self._is_logged_in = await self._check_login_status()
                 if self._is_logged_in:
                     self.logger.info(f"[auth] Login successful on attempt {i}.")
@@ -83,6 +81,8 @@ class YamiboBrowser(BaseBrowser):
                 self.logger.error(
                     f"[auth] Unexpected error during login attempt {i}: {e}"
                 )
+            finally:
+                await login_page.close()
         self.logger.error(f"[auth] Login failed after {attempt} attempt(s).")
         return False

novel_downloader/core/interfaces/downloader.py CHANGED Viewed

@@ -10,45 +10,46 @@ that outlines the expected behavior of any downloader class.
 from collections.abc import Awaitable, Callable
 from typing import Any, Protocol, runtime_checkable
+from novel_downloader.models import BookConfig
 @runtime_checkable
 class DownloaderProtocol(Protocol):
     """
-    Protocol for fully-asynchronous downloader classes.
+    Protocol for async downloader implementations.
-    Defines the expected interface for any downloader implementation,
-    including both batch and single book downloads,
-    as well as optional pre-download hooks.
+    Uses BookConfig (with book_id, optional start_id/end_id/ignore_ids)
+    for both single and batch downloads.
     """
     async def download(
         self,
-        book_id: str,
+        book: BookConfig,
         *,
         progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
         **kwargs: Any,
     ) -> None:
         """
-        Download logic for a single book.
+        Download a single book.
-        :param book_id: The identifier of the book.
-        :param progress_hook: (optional) Called after each chapter;
+        :param book: BookConfig with at least 'book_id'.
+        :param progress_hook: Optional async callback after each chapter.
                                 args: completed_count, total_count.
         """
         ...
     async def download_many(
         self,
-        book_ids: list[str],
+        books: list[BookConfig],
         *,
         progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
         **kwargs: Any,
     ) -> None:
         """
-        Batch download entry point.
+        Download multiple books.
-        :param book_ids: List of book IDs to download.
-        :param progress_hook: (optional) Called after each chapter;
+        :param books: List of BookConfig entries.
+        :param progress_hook: Optional async callback after each chapter.
                                 args: completed_count, total_count.
         """
         ...

novel_downloader/core/parsers/qidian/chapter_encrypted.py CHANGED Viewed

@@ -19,12 +19,16 @@ from lxml import html
 from novel_downloader.models import ChapterDict
 from novel_downloader.utils.network import download_font_file
-from novel_downloader.utils.text_utils import apply_font_mapping
+from novel_downloader.utils.text_utils import (
+    apply_font_mapping,
+    truncate_half_lines,
+)
 from .utils import (
     extract_chapter_info,
     find_ssr_page_context,
     get_decryptor,
+    is_duplicated,
     vip_status,
 )
@@ -76,6 +80,7 @@ def parse_encrypted_chapter(
         fixedFontWoff2_url = chapter_info["fixedFontWoff2"]
         title = chapter_info.get("chapterName", "Untitled")
+        duplicated = is_duplicated(ssr_data)
         raw_html = chapter_info.get("content", "")
         chapter_id = chapter_info.get("chapterId", chapter_id)
         fkp = chapter_info.get("fkp", "")
@@ -83,7 +88,7 @@ def parse_encrypted_chapter(
         update_time = chapter_info.get("updateTime", "")
         update_timestamp = chapter_info.get("updateTimestamp", 0)
         modify_time = chapter_info.get("modifyTime", 0)
-        word_count = chapter_info.get("wordsCount", 0)
+        word_count = chapter_info.get("actualWords", 0)
         seq = chapter_info.get("seq", None)
         volume = chapter_info.get("extra", {}).get("volumeName", "")
@@ -177,6 +182,9 @@ def parse_encrypted_chapter(
         final_paragraphs_str = "\n\n".join(
             line.strip() for line in original_text.splitlines() if line.strip()
         )
+        if parser._use_truncation and duplicated:
+            final_paragraphs_str = truncate_half_lines(final_paragraphs_str)
         return {
             "id": str(chapter_id),
             "title": str(title),
@@ -187,6 +195,7 @@ def parse_encrypted_chapter(
                 "update_timestamp": update_timestamp,
                 "modify_time": modify_time,
                 "word_count": word_count,
+                "duplicated": duplicated,
                 "seq": seq,
                 "volume": volume,
                 "encrypted": True,

novel_downloader/core/parsers/qidian/chapter_normal.py CHANGED Viewed

@@ -15,11 +15,13 @@ from typing import TYPE_CHECKING
 from lxml import html
 from novel_downloader.models import ChapterDict
+from novel_downloader.utils.text_utils import truncate_half_lines
 from .utils import (
     extract_chapter_info,
     find_ssr_page_context,
     get_decryptor,
+    is_duplicated,
     vip_status,
 )
@@ -51,6 +53,7 @@ def parse_normal_chapter(
             return None
         title = chapter_info.get("chapterName", "Untitled")
+        duplicated = is_duplicated(ssr_data)
         raw_html = chapter_info.get("content", "")
         chapter_id = chapter_info.get("chapterId", chapter_id)
         fkp = chapter_info.get("fkp", "")
@@ -58,7 +61,7 @@ def parse_normal_chapter(
         update_time = chapter_info.get("updateTime", "")
         update_timestamp = chapter_info.get("updateTimestamp", 0)
         modify_time = chapter_info.get("modifyTime", 0)
-        word_count = chapter_info.get("wordsCount", 0)
+        word_count = chapter_info.get("actualWords", 0)
         seq = chapter_info.get("seq", None)
         volume = chapter_info.get("extra", {}).get("volumeName", "")
@@ -74,6 +77,9 @@ def parse_normal_chapter(
             if not chapter_text:
                 return None
+        if parser._use_truncation and duplicated:
+            chapter_text = truncate_half_lines(chapter_text)
         return {
             "id": str(chapter_id),
             "title": title,
@@ -84,6 +90,7 @@ def parse_normal_chapter(
                 "update_timestamp": update_timestamp,
                 "modify_time": modify_time,
                 "word_count": word_count,
+                "duplicated": duplicated,
                 "seq": seq,
                 "volume": volume,
                 "encrypted": False,

novel_downloader/core/parsers/qidian/main_parser.py CHANGED Viewed

@@ -32,7 +32,11 @@ class QidianParser(BaseParser):
     Parser for Qidian site.
     """
-    def __init__(self, config: ParserConfig):
+    def __init__(
+        self,
+        config: ParserConfig,
+        fuid: str = "",
+    ):
         """
         Initialize the QidianParser with the given configuration.
@@ -41,6 +45,7 @@ class QidianParser(BaseParser):
         super().__init__(config)
         # Extract and store parser flags from config
+        self._use_truncation = config.use_truncation
         self._decode_font: bool = config.decode_font
         self._save_font_debug: bool = config.save_font_debug
@@ -52,7 +57,7 @@ class QidianParser(BaseParser):
             DATA_DIR / "qidian" / "browser_state.cookies",
             DATA_DIR / "qidian" / "session_state.cookies",
         ]
-        self._fuid: str = find_cookie_value(state_files, "ywguid")
+        self._fuid: str = fuid or find_cookie_value(state_files, "ywguid")
         self._font_ocr: FontOCR | None = None
         if self._decode_font:

novel_downloader/core/parsers/qidian/utils/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ from .helpers import (
     can_view_chapter,
     extract_chapter_info,
     find_ssr_page_context,
+    is_duplicated,
     is_encrypted,
     is_restricted_page,
     vip_status,
@@ -22,6 +23,7 @@ __all__ = [
     "vip_status",
     "can_view_chapter",
     "is_encrypted",
+    "is_duplicated",
     "QidianNodeDecryptor",
     "get_decryptor",
 ]

novel_downloader/core/parsers/qidian/utils/helpers.py CHANGED Viewed

@@ -89,6 +89,15 @@ def can_view_chapter(ssr_data: dict[str, Any]) -> bool:
     return not (vip_status == 1 and is_buy == 0)
+def is_duplicated(ssr_data: dict[str, Any]) -> bool:
+    """
+    Check if chapter is marked as duplicated (eFW = 1).
+    """
+    chapter_info = extract_chapter_info(ssr_data)
+    efw_flag = chapter_info.get("eFW", 0)
+    return bool(efw_flag == 1)
 def is_encrypted(content: str | dict[str, Any]) -> bool:
     """
     Return True if content is encrypted.

novel_downloader/locales/en.json CHANGED Viewed

@@ -66,6 +66,8 @@
   "download_downloading": "Downloading book {book_id} from {site}...",
   "download_prompt_parse": "Parse...",
   "download_book_ids": "One or more book IDs to process",
+  "download_option_start": "Start chapter ID (applies to the first book ID only)",
+  "download_option_end": "End chapter ID (applies to the first book ID only)",
   "login_description": "Description",
   "login_hint": "Hint",
   "login_manual_prompt": ">> Please complete login in your browser and press Enter to continue...",

novel_downloader/locales/zh.json CHANGED Viewed

@@ -66,6 +66,8 @@
   "download_downloading": "正在从 {site} 下载书籍 {book_id}...",
   "download_prompt_parse": "结束...",
   "download_book_ids": "要处理的一个或多个小说 ID",
+  "download_option_start": "起始章节 ID (仅用于第一个书籍 ID)",
+  "download_option_end": "结束章节 ID (仅用于第一个书籍 ID)",
   "login_description": "说明",
   "login_hint": "提示",
   "login_manual_prompt": ">> 请在浏览器中完成登录后按回车继续...",

novel_downloader/models/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ novel_downloader.models
 from .browser import NewContextOptions
 from .chapter import ChapterDict
 from .config import (
+    BookConfig,
     DownloaderConfig,
     ExporterConfig,
     FetcherConfig,
@@ -39,6 +40,7 @@ from .types import (
 __all__ = [
     "NewContextOptions",
+    "BookConfig",
     "DownloaderConfig",
     "ParserConfig",
     "FetcherConfig",

novel_downloader/models/config.py CHANGED Viewed

@@ -17,6 +17,7 @@ strongly typed Python objects for safer and cleaner access.
 """
 from dataclasses import dataclass
+from typing import NotRequired, TypedDict
 from .types import (
     BrowserType,
@@ -67,6 +68,7 @@ class DownloaderConfig:
 @dataclass
 class ParserConfig:
     cache_dir: str = "./novel_cache"
+    use_truncation: bool = True
     decode_font: bool = False
     use_freq: bool = False
     use_ocr: bool = True
@@ -98,3 +100,10 @@ class ExporterConfig:
     include_toc: bool = False
     include_picture: bool = False
     split_mode: SplitMode = "book"
+class BookConfig(TypedDict):
+    book_id: str
+    start_id: NotRequired[str]
+    end_id: NotRequired[str]
+    ignore_ids: NotRequired[list[str]]

novel_downloader/resources/config/settings.toml CHANGED Viewed

@@ -52,6 +52,7 @@ book_ids = [
 ]
 mode = "session"                   # browser / session
 login_required = true              # 是否需要登录才能访问
+use_truncation = true              # 是否基于章节长度截断以避免重复内容
 [sites.biquge]  # 笔趣阁
 book_ids = [

novel_downloader/tui/screens/home.py CHANGED Viewed

@@ -65,7 +65,13 @@ class HomeScreen(Screen):  # type: ignore[misc]
                 return
             id_list = {x.strip() for x in ids.split(",") if x.strip()}
             adapter = ConfigAdapter(config=self.app.config, site=str(site))
-            asyncio.create_task(self._download(adapter, str(site), id_list))
+            # asyncio.create_task(self._download(adapter, str(site), id_list))
+            self.run_worker(
+                self._download(adapter, str(site), id_list),
+                name="download",
+                group="downloads",
+                description="正在下载书籍...",
+            )
     def _make_title_bar(self) -> Horizontal:
         return Horizontal(
@@ -106,12 +112,12 @@ class HomeScreen(Screen):  # type: ignore[misc]
         self,
         adapter: ConfigAdapter,
         site: str,
-        valid_book_ids: set[str],
+        book_ids: set[str],
     ) -> None:
         btn = self.query_one("#download", Button)
         btn.disabled = True
         try:
-            logging.info(f"下载请求: {site} | {valid_book_ids}")
+            logging.info(f"下载请求: {site} | {book_ids}")
             downloader_cfg = adapter.get_downloader_config()
             fetcher_cfg = adapter.get_fetcher_config()
             parser_cfg = adapter.get_parser_config()
@@ -134,16 +140,17 @@ class HomeScreen(Screen):  # type: ignore[misc]
                 downloader = get_downloader(
                     fetcher=fetcher,
                     parser=parser,
-                    exporter=exporter,
                     site=site,
                     config=downloader_cfg,
                 )
-                for book_id in valid_book_ids:
+                for book_id in book_ids:
                     logging.info(t("download_downloading", book_id=book_id, site=site))
                     await downloader.download(
-                        book_id, progress_hook=self._update_progress
+                        {"book_id": book_id},
+                        progress_hook=self._update_progress,
                     )
+                    await asyncio.to_thread(exporter.export, book_id)
                 if downloader_cfg.login_required and fetcher.is_logged_in:
                     await fetcher.save_state()

novel_downloader/utils/constants.py CHANGED Viewed

@@ -116,35 +116,6 @@ QD_DECRYPT_SCRIPT_PATH = files("novel_downloader.resources.js_scripts").joinpath
 # Text Files
 BLACKLIST_PATH = files("novel_downloader.resources.text").joinpath("blacklist.txt")
-# -----------------------------------------------------------------------------
-# EPUB defaults
-# -----------------------------------------------------------------------------
-EPUB_IMAGE_FOLDER = "Images"
-EPUB_TEXT_FOLDER = "Text"
-EPUB_IMAGE_WRAPPER = (
-    '<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
-)
-EPUB_OPTIONS = {
-    # guide 是 EPUB 2 的一个部分, 包含封面, 目录, 索引等重要导航信息
-    "epub2_guide": True,
-    # landmark 是 EPUB 3 用来标识重要页面 (如目录, 封面, 起始页) 的 <nav> 结构
-    "epub3_landmark": True,
-    # EPUB 3 允许提供一个 page list, 让电子书在不同设备上仍然保持相对一致的分页结构
-    "epub3_pages": True,
-    # 这个名字会出现在 EPUB 阅读器的导航栏
-    "landmark_title": "Guide",
-    # 这个名字会显示在 EPUB 阅读器的分页导航栏
-    "pages_title": "Pages",
-    # 是否根据 book.spine 的排列顺序自动设置 EPUB 阅读器的 page-progression-direction
-    "spine_direction": True,
-    # 控制 EPUB 阅读器的默认翻页方向 (LTR 或 RTL)
-    "package_direction": False,
-    # 是否为 EPUB 书籍中的章节 添加播放顺序
-    "play_order": {"enabled": True, "start_from": 1},
-}
 # ---------------------------------------------------------------------
 # Pretrained model registry (e.g. used in font recovery or OCR)
 # ---------------------------------------------------------------------

novel_downloader/utils/{model_loader.py → fontocr/model_loader.py} RENAMED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-novel_downloader.utils.model_loader
------------------------------------
+novel_downloader.utils.fontocr.model_loader
+-------------------------------------------
 Utility functions for managing pre-trained model downloads.

novel_downloader/utils/fontocr/ocr_v1.py CHANGED Viewed

@@ -25,7 +25,8 @@ from novel_downloader.utils.constants import (
     REC_IMAGE_SHAPE_MAP,
 )
 from novel_downloader.utils.hash_store import img_hash_store
-from novel_downloader.utils.model_loader import get_rec_chinese_char_model_dir
+from .model_loader import get_rec_chinese_char_model_dir
 logger = logging.getLogger(__name__)

novel_downloader/utils/fontocr/ocr_v2.py CHANGED Viewed

@@ -36,7 +36,8 @@ from novel_downloader.utils.constants import (
     REC_IMAGE_SHAPE_MAP,
 )
 from novel_downloader.utils.hash_store import img_hash_store
-from novel_downloader.utils.model_loader import (
+from .model_loader import (
     get_rec_char_vector_dir,
     get_rec_chinese_char_model_dir,
 )

novel_downloader/utils/text_utils/__init__.py CHANGED Viewed

@@ -15,12 +15,19 @@ Submodules:
 from .chapter_formatting import format_chapter
 from .diff_display import diff_inline_display
 from .font_mapping import apply_font_mapping
-from .text_cleaning import clean_chapter_title, is_promotional_line
+from .text_cleaning import (
+    clean_chapter_title,
+    content_prefix,
+    is_promotional_line,
+    truncate_half_lines,
+)
 __all__ = [
     "apply_font_mapping",
     "format_chapter",
     "clean_chapter_title",
     "is_promotional_line",
+    "content_prefix",
+    "truncate_half_lines",
     "diff_inline_display",
 ]

novel_downloader/utils/text_utils/text_cleaning.py CHANGED Viewed

@@ -6,6 +6,7 @@ novel_downloader.utils.text_utils.text_cleaning
 Tools for detecting and removing promotional or ad-like content from text.
 """
+import math
 import re
 from novel_downloader.utils.file_utils.io import load_blacklisted_words
@@ -50,7 +51,57 @@ def is_promotional_line(line: str) -> bool:
     return False
+def content_prefix(
+    text: str,
+    n: int,
+    ignore_chars: set[str] | None = None,
+) -> str:
+    """
+    Return the prefix of `text` containing the first `n` non-ignored characters.
+    :param text: The full input string.
+    :param n: Number of content characters to include.
+    :param ignore_chars: Characters to ignore when counting content.
+    :return: Truncated string preserving original whitespace and line breaks.
+    """
+    ignore = ignore_chars or set()
+    cnt = 0
+    for i, ch in enumerate(text):
+        if ch not in ignore:
+            cnt += 1
+            if cnt >= n:
+                return text[: i + 1]
+    return text
+def truncate_half_lines(text: str) -> str:
+    """
+    Keep the first half of the lines (rounded up), preserving line breaks.
+    :param text: Full input text
+    :return: Truncated text with first half of lines
+    """
+    lines = text.splitlines()
+    non_empty_lines = [line for line in lines if line.strip()]
+    keep_count = math.ceil(len(non_empty_lines) / 2)
+    result_lines = []
+    count = 0
+    for line in lines:
+        result_lines.append(line)
+        if line.strip():
+            count += 1
+        if count >= keep_count:
+            break
+    return "\n".join(result_lines)
 __all__ = [
     "clean_chapter_title",
     "is_promotional_line",
+    "content_prefix",
+    "truncate_half_lines",
 ]

novel-downloader 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl

novel-downloader 1.4.1py3-none-any.whl → 1.4.3py3-none-any.whl