PyPI - novel-downloader - Versions diffs - 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl - Mend

novel-downloader 1.4.1py3-none-any.whl → 1.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

novel_downloader/core/downloaders/qidian.py CHANGED Viewed

@@ -13,11 +13,11 @@ from typing import Any, cast
 from novel_downloader.core.downloaders.base import BaseDownloader
 from novel_downloader.core.interfaces import (
-    ExporterProtocol,
     FetcherProtocol,
     ParserProtocol,
 )
 from novel_downloader.models import (
+    BookConfig,
     ChapterDict,
     CidTask,
     DownloaderConfig,
@@ -40,15 +40,14 @@ class QidianDownloader(BaseDownloader):
         self,
         fetcher: FetcherProtocol,
         parser: ParserProtocol,
-        exporter: ExporterProtocol,
         config: DownloaderConfig,
     ):
         config.request_interval = max(1.0, config.request_interval)
-        super().__init__(fetcher, parser, exporter, config, "qidian")
+        super().__init__(fetcher, parser, config, "qidian")
     async def _download_one(
         self,
-        book_id: str,
+        book: BookConfig,
         *,
         progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
         **kwargs: Any,
@@ -56,9 +55,13 @@ class QidianDownloader(BaseDownloader):
         """
         The full download logic for a single book.
-        :param book_id: The identifier of the book to download.
+        :param book: BookConfig with at least 'book_id'.
         """
         TAG = "[Downloader]"
+        book_id = book["book_id"]
+        start_id = book.get("start_id")
+        end_id = book.get("end_id")
+        ignore_set = set(book.get("ignore_ids", []))
         raw_base = self.raw_data_dir / book_id
         cache_base = self.cache_dir / book_id
@@ -140,6 +143,10 @@ class QidianDownloader(BaseDownloader):
                     cid_queue.task_done()
                     continue
+                if cid in ignore_set:
+                    cid_queue.task_done()
+                    continue
                 try:
                     html_list = await self.fetcher.get_book_chapter(book_id, cid)
                     await html_queue.put(
@@ -194,40 +201,39 @@ class QidianDownloader(BaseDownloader):
                 skip_retry = False
                 try:
                     chap_json: ChapterDict | None = None
-                    if self.is_restricted_page(task.html_list):
+                    if self.check_restricted(task.html_list):
                         self.logger.info(
                             "[Parser] Skipped restricted page for cid %s", task.cid
                         )
                         skip_retry = True
-                    else:
-                        chap_json = await asyncio.to_thread(
-                            self.parser.parse_chapter,
-                            task.html_list,
+                        raise ValueError("Restricted content detected")
+                    is_encrypted = self.check_encrypted(task.html_list)
+                    chap_json = await asyncio.to_thread(
+                        self.parser.parse_chapter,
+                        task.html_list,
+                        task.cid,
+                    )
+                    if is_encrypted:
+                        skip_retry = True
+                    if self.save_html:
+                        folder = chapters_html_dir / (
+                            "html_encrypted" if is_encrypted else "html_plain"
+                        )
+                        html_path = folder / f"{task.cid}.html"
+                        save_as_txt(task.html_list[0], html_path, on_exist="skip")
+                        self.logger.debug(
+                            "%s Saved raw HTML for chapter %s to %s",
+                            TAG,
                             task.cid,
+                            html_path,
                         )
-                    if self.check_encrypted(task.html_list):
-                        skip_retry = True
                     if chap_json:
                         await save_queue.put(chap_json)
                         self.logger.info(
                             "[Parser] saved chapter %s",
                             task.cid,
                         )
-                        if self.save_html:
-                            is_encrypted = chap_json.get("extra", {}).get(
-                                "encrypted", False
-                            )
-                            folder = chapters_html_dir / (
-                                "html_encrypted" if is_encrypted else "html_plain"
-                            )
-                            html_path = folder / f"{task.cid}.html"
-                            save_as_txt(task.html_list[0], html_path, on_exist="skip")
-                            self.logger.debug(
-                                "%s Saved raw HTML for chapter %s to %s",
-                                TAG,
-                                task.cid,
-                                html_path,
-                            )
                     else:
                         raise ValueError("Empty parse result")
                 except Exception as e:
@@ -296,20 +302,40 @@ class QidianDownloader(BaseDownloader):
             )
         )
-        last_cid: str | None = None
+        found_start = start_id is None
+        stop_early = False
         for vol in book_info.get("volumes", []):
             chapters = vol.get("chapters", [])
             for chap in chapters:
+                if stop_early:
+                    break
                 cid = chap.get("chapterId")
-                if cid and normal_cs.exists(cid) and self.skip_existing:
+                if not cid:
+                    continue
+                if not found_start:
+                    if cid == start_id:
+                        found_start = True
+                    else:
+                        completed_count += 1
+                        continue
+                if end_id is not None and cid == end_id:
+                    stop_early = True
+                if cid in ignore_set:
+                    continue
+                if normal_cs.exists(cid) and self.skip_existing:
                     completed_count += 1
-                    if progress_hook:
-                        await progress_hook(completed_count, total_chapters)
-                    last_cid = cid
                     continue
-                await cid_queue.put(CidTask(cid=cid, prev_cid=last_cid))
-                last_cid = cid
+                await cid_queue.put(CidTask(cid=cid, prev_cid=None))
+            if stop_early:
+                break
         await cid_queue.join()
         await html_queue.join()
@@ -323,8 +349,6 @@ class QidianDownloader(BaseDownloader):
         normal_cs.close()
         encrypted_cs.close()
-        await asyncio.to_thread(self.exporter.export, book_id)
         self.logger.info(
             "%s Novel '%s' download completed.",
             TAG,
@@ -333,7 +357,7 @@ class QidianDownloader(BaseDownloader):
         return
     @staticmethod
-    def is_restricted_page(html_list: list[str]) -> bool:
+    def check_restricted(html_list: list[str]) -> bool:
         """
         Return True if page content indicates access restriction
         (e.g. not subscribed/purchased).

novel_downloader/core/downloaders/sfacg.py CHANGED Viewed

@@ -7,7 +7,6 @@ novel_downloader.core.downloaders.sfacg
 from novel_downloader.core.downloaders.common import CommonDownloader
 from novel_downloader.core.interfaces import (
-    ExporterProtocol,
     FetcherProtocol,
     ParserProtocol,
 )
@@ -21,7 +20,6 @@ class SfacgDownloader(CommonDownloader):
         self,
         fetcher: FetcherProtocol,
         parser: ParserProtocol,
-        exporter: ExporterProtocol,
         config: DownloaderConfig,
     ):
-        super().__init__(fetcher, parser, exporter, config, "sfacg")
+        super().__init__(fetcher, parser, config, "sfacg")

novel_downloader/core/downloaders/yamibo.py CHANGED Viewed

@@ -7,7 +7,6 @@ novel_downloader.core.downloaders.yamibo
 from novel_downloader.core.downloaders.common import CommonDownloader
 from novel_downloader.core.interfaces import (
-    ExporterProtocol,
     FetcherProtocol,
     ParserProtocol,
 )
@@ -21,7 +20,6 @@ class YamiboDownloader(CommonDownloader):
         self,
         fetcher: FetcherProtocol,
         parser: ParserProtocol,
-        exporter: ExporterProtocol,
         config: DownloaderConfig,
     ):
-        super().__init__(fetcher, parser, exporter, config, "yamibo")
+        super().__init__(fetcher, parser, config, "yamibo")

novel_downloader/core/exporters/common/epub.py CHANGED Viewed

@@ -8,25 +8,19 @@ Contains the logic for exporting novel content as a single `.epub` file.
 from __future__ import annotations
+import html
 import json
+import re
 from pathlib import Path
 from typing import TYPE_CHECKING
-from ebooklib import epub
-from novel_downloader.core.exporters.epub_utils import (
-    add_images_from_dir,
-    chapter_txt_to_html,
-    create_css_items,
-    create_volume_intro,
-    generate_book_intro_html,
-    init_epub,
-    inline_remote_images,
-)
-from novel_downloader.utils.constants import (
-    EPUB_OPTIONS,
-    EPUB_TEXT_FOLDER,
+from novel_downloader.core.exporters.epub_util import (
+    Book,
+    Chapter,
+    StyleSheet,
+    Volume,
 )
+from novel_downloader.utils.constants import CSS_MAIN_PATH
 from novel_downloader.utils.file_utils import sanitize_filename
 from novel_downloader.utils.network import download_image
 from novel_downloader.utils.text_utils import clean_chapter_title
@@ -34,6 +28,16 @@ from novel_downloader.utils.text_utils import clean_chapter_title
 if TYPE_CHECKING:
     from .main_exporter import CommonExporter
+_IMAGE_WRAPPER = (
+    '<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
+)
+_IMG_TAG_PATTERN = re.compile(
+    r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
+)
+_RAW_HTML_RE = re.compile(
+    r'^(<img\b[^>]*?\/>|<div class="duokan-image-single illus">.*?<\/div>)$', re.DOTALL
+)
 def common_export_as_epub(
     exporter: CommonExporter,
@@ -71,12 +75,12 @@ def common_export_as_epub(
         return
     book_name = book_info.get("book_name", book_id)
+    book_author = book_info.get("author", "")
     exporter.logger.info(
         "%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
     )
     # --- Generate intro + cover ---
-    intro_html = generate_book_intro_html(book_info)
     cover_path: Path | None = None
     cover_url = book_info.get("cover_url", "")
     if config.include_cover and cover_url:
@@ -90,49 +94,56 @@ def common_export_as_epub(
             exporter.logger.warning("Failed to download cover from %s", cover_url)
     # --- Initialize EPUB ---
-    book, spine, toc_list = init_epub(
-        book_info=book_info,
-        book_id=book_id,
-        intro_html=intro_html,
-        book_cover_path=cover_path,
-        include_toc=config.include_toc,
+    book = Book(
+        title=book_name,
+        author=book_author,
+        description=book_info.get("summary", ""),
+        cover_path=cover_path,
+        subject=book_info.get("subject", []),
+        serial_status=book_info.get("serial_status", ""),
+        word_count=book_info.get("word_count", ""),
+        uid=f"{exporter.site}_{book_id}",
+    )
+    main_css = StyleSheet(
+        id="main_style",
+        content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
+        filename="main.css",
     )
-    for css in create_css_items(
-        include_main=True,
-        include_volume=True,
-    ):
-        book.add_item(css)
+    book.add_stylesheet(main_css)
     # --- Compile chapters ---
     volumes = book_info.get("volumes", [])
     for vol_index, vol in enumerate(volumes, start=1):
-        raw_vol_name = vol.get("volume_name", "").strip()
-        vol_name = clean_chapter_title(raw_vol_name) or f"Unknown Volume {vol_index}"
+        raw_vol_name = vol.get("volume_name", "")
+        raw_vol_name = raw_vol_name.replace(book_name, "").strip()
+        vol_name = raw_vol_name or f"Volume {vol_index}"
         exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
-        # Volume intro
-        vol_intro = epub.EpubHtml(
+        vol_cover_path: Path | None = None
+        vol_cover_url = vol.get("volume_cover", "")
+        if vol_cover_url:
+            vol_cover_path = download_image(
+                vol_cover_url,
+                img_dir,
+                on_exist="skip",
+            )
+        curr_vol = Volume(
+            id=f"vol_{vol_index}",
             title=vol_name,
-            file_name=f"{EPUB_TEXT_FOLDER}/volume_intro_{vol_index}.xhtml",
-            lang="zh",
+            intro=vol.get("volume_intro", ""),
+            cover=vol_cover_path,
         )
-        vol_intro.content = create_volume_intro(vol_name, vol.get("volume_intro", ""))
-        vol_intro.add_link(
-            href="../Styles/volume-intro.css",
-            rel="stylesheet",
-            type="text/css",
-        )
-        book.add_item(vol_intro)
-        spine.append(vol_intro)
-        section = epub.Section(vol_name, vol_intro.file_name)
-        chapter_items: list[epub.EpubHtml] = []
         for chap in vol.get("chapters", []):
             chap_id = chap.get("chapterId")
             chap_title = chap.get("title", "")
             if not chap_id:
-                exporter.logger.warning("%s Missing chapterId, skipping: %s", TAG, chap)
+                exporter.logger.warning(
+                    "%s Missing chapterId, skipping: %s",
+                    TAG,
+                    chap,
+                )
                 continue
             chapter_data = exporter._get_chapter(book_id, chap_id)
@@ -147,36 +158,28 @@ def common_export_as_epub(
             title = clean_chapter_title(chapter_data.get("title", "")) or chap_id
             content: str = chapter_data.get("content", "")
-            content = inline_remote_images(content, img_dir)
-            chap_html = chapter_txt_to_html(
+            content, img_paths = _inline_remote_images(content, img_dir)
+            chap_html = _txt_to_html(
                 chapter_title=title,
                 chapter_text=content,
-                author_say=chapter_data.get("author_say", ""),
+                extras={
+                    "作者说": chapter_data.get("author_say", ""),
+                },
             )
-            chap_path = f"{EPUB_TEXT_FOLDER}/{chap_id}.xhtml"
-            item = epub.EpubHtml(title=chap_title, file_name=chap_path, lang="zh")
-            item.content = chap_html
-            item.add_link(
-                href="../Styles/main.css",
-                rel="stylesheet",
-                type="text/css",
+            curr_vol.add_chapter(
+                Chapter(
+                    id=f"c_{chap_id}",
+                    title=title,
+                    content=chap_html,
+                    css=[main_css],
+                )
             )
-            book.add_item(item)
-            spine.append(item)
-            chapter_items.append(item)
-        toc_list.append((section, chapter_items))
+            for img_path in img_paths:
+                book.add_image(img_path)
-    book = add_images_from_dir(book, img_dir)
+        book.add_volume(curr_vol)
     # --- 5. Finalize EPUB ---
-    exporter.logger.info("%s Building TOC and spine...", TAG)
-    book.toc = toc_list
-    book.spine = spine
-    book.add_item(epub.EpubNcx())
-    book.add_item(epub.EpubNav())
     out_name = exporter.get_filename(
         title=book_name,
         author=book_info.get("author"),
@@ -185,8 +188,90 @@ def common_export_as_epub(
     out_path = out_dir / sanitize_filename(out_name)
     try:
-        epub.write_epub(out_path, book, EPUB_OPTIONS)
+        book.export(out_path)
         exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
     except Exception as e:
         exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
     return
+def _inline_remote_images(
+    content: str,
+    image_dir: str | Path,
+) -> tuple[str, list[Path]]:
+    """
+    Download every remote `<img src="...">` in `content` into `image_dir`,
+    and replace the original tag with _IMAGE_WRAPPER
+    pointing to the local filename.
+    :param content: HTML/text of the chapter containing <img> tags.
+    :param image_dir: Directory to save downloaded images into.
+    :return: A tuple (modified_content, list_of_downloaded_image_paths).
+    """
+    downloaded_images: list[Path] = []
+    def _replace(match: re.Match[str]) -> str:
+        url = match.group(1)
+        try:
+            # download_image returns a Path or None
+            local_path = download_image(
+                url,
+                image_dir,
+                target_name=None,
+                on_exist="skip",
+            )
+            if not local_path:
+                return match.group(0)
+            downloaded_images.append(local_path)
+            return _IMAGE_WRAPPER.format(filename=local_path.name)
+        except Exception:
+            return match.group(0)
+    modified_content = _IMG_TAG_PATTERN.sub(_replace, content)
+    return modified_content, downloaded_images
+def _txt_to_html(
+    chapter_title: str,
+    chapter_text: str,
+    extras: dict[str, str] | None = None,
+) -> str:
+    """
+    Convert chapter text and author note to styled HTML.
+    :param chapter_title: Title of the chapter.
+    :param chapter_text: Main content of the chapter.
+    :param extras: Optional dict of titles and content, e.g. {"作者说": "text"}.
+    :return: Rendered HTML as a string.
+    """
+    def _render_block(text: str) -> str:
+        lines = (line.strip() for line in text.splitlines() if line.strip())
+        out = []
+        for line in lines:
+            # preserve raw HTML, otherwise wrap in <p>
+            if _RAW_HTML_RE.match(line):
+                out.append(line)
+            else:
+                out.append(f"<p>{html.escape(line)}</p>")
+        return "\n".join(out)
+    parts = []
+    parts.append(f"<h2>{html.escape(chapter_title)}</h2>")
+    parts.append(_render_block(chapter_text))
+    if extras:
+        for title, note in extras.items():
+            note = note.strip()
+            if not note:
+                continue
+            parts.extend(
+                [
+                    "<hr />",
+                    f"<p>{html.escape(title)}</p>",
+                    _render_block(note),
+                ]
+            )
+    return "\n".join(parts)

novel-downloader 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl

novel-downloader 1.4.1py3-none-any.whl → 1.4.3py3-none-any.whl