PyPI - novel-downloader - Versions diffs - 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

novel-downloader 1.4.5py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

novel_downloader/__init__.py +1 -1
novel_downloader/cli/__init__.py +2 -2
novel_downloader/cli/config.py +1 -83
novel_downloader/cli/download.py +4 -5
novel_downloader/cli/export.py +4 -1
novel_downloader/cli/main.py +2 -0
novel_downloader/cli/search.py +123 -0
novel_downloader/config/__init__.py +3 -10
novel_downloader/config/adapter.py +190 -54
novel_downloader/config/loader.py +2 -3
novel_downloader/core/__init__.py +13 -13
novel_downloader/core/downloaders/__init__.py +10 -11
novel_downloader/core/downloaders/base.py +152 -26
novel_downloader/core/downloaders/biquge.py +5 -1
novel_downloader/core/downloaders/common.py +157 -378
novel_downloader/core/downloaders/esjzone.py +5 -1
novel_downloader/core/downloaders/linovelib.py +5 -1
novel_downloader/core/downloaders/qianbi.py +291 -4
novel_downloader/core/downloaders/qidian.py +199 -285
novel_downloader/core/downloaders/registry.py +67 -0
novel_downloader/core/downloaders/sfacg.py +5 -1
novel_downloader/core/downloaders/yamibo.py +5 -1
novel_downloader/core/exporters/__init__.py +10 -11
novel_downloader/core/exporters/base.py +87 -7
novel_downloader/core/exporters/biquge.py +5 -8
novel_downloader/core/exporters/common/__init__.py +2 -2
novel_downloader/core/exporters/common/epub.py +82 -166
novel_downloader/core/exporters/common/main_exporter.py +0 -60
novel_downloader/core/exporters/common/txt.py +82 -83
novel_downloader/core/exporters/epub_util.py +157 -1330
novel_downloader/core/exporters/esjzone.py +5 -8
novel_downloader/core/exporters/linovelib/__init__.py +2 -2
novel_downloader/core/exporters/linovelib/epub.py +157 -212
novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
novel_downloader/core/exporters/linovelib/txt.py +67 -63
novel_downloader/core/exporters/qianbi.py +5 -8
novel_downloader/core/exporters/qidian.py +14 -4
novel_downloader/core/exporters/registry.py +53 -0
novel_downloader/core/exporters/sfacg.py +5 -8
novel_downloader/core/exporters/txt_util.py +67 -0
novel_downloader/core/exporters/yamibo.py +5 -8
novel_downloader/core/fetchers/__init__.py +19 -24
novel_downloader/core/fetchers/base/__init__.py +3 -3
novel_downloader/core/fetchers/base/browser.py +23 -4
novel_downloader/core/fetchers/base/session.py +30 -5
novel_downloader/core/fetchers/biquge/__init__.py +3 -3
novel_downloader/core/fetchers/biquge/browser.py +5 -0
novel_downloader/core/fetchers/biquge/session.py +6 -1
novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
novel_downloader/core/fetchers/esjzone/browser.py +5 -0
novel_downloader/core/fetchers/esjzone/session.py +6 -1
novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
novel_downloader/core/fetchers/linovelib/browser.py +6 -1
novel_downloader/core/fetchers/linovelib/session.py +6 -1
novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
novel_downloader/core/fetchers/qianbi/browser.py +5 -0
novel_downloader/core/fetchers/qianbi/session.py +5 -0
novel_downloader/core/fetchers/qidian/__init__.py +3 -3
novel_downloader/core/fetchers/qidian/browser.py +12 -4
novel_downloader/core/fetchers/qidian/session.py +11 -3
novel_downloader/core/fetchers/registry.py +71 -0
novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
novel_downloader/core/fetchers/sfacg/browser.py +5 -0
novel_downloader/core/fetchers/sfacg/session.py +5 -0
novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
novel_downloader/core/fetchers/yamibo/browser.py +5 -0
novel_downloader/core/fetchers/yamibo/session.py +6 -1
novel_downloader/core/interfaces/__init__.py +7 -5
novel_downloader/core/interfaces/searcher.py +18 -0
novel_downloader/core/parsers/__init__.py +10 -11
novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
novel_downloader/core/parsers/qidian/__init__.py +2 -2
novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
novel_downloader/core/parsers/qidian/main_parser.py +10 -21
novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
novel_downloader/core/parsers/registry.py +68 -0
novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
novel_downloader/core/searchers/__init__.py +20 -0
novel_downloader/core/searchers/base.py +92 -0
novel_downloader/core/searchers/biquge.py +83 -0
novel_downloader/core/searchers/esjzone.py +84 -0
novel_downloader/core/searchers/qianbi.py +131 -0
novel_downloader/core/searchers/qidian.py +87 -0
novel_downloader/core/searchers/registry.py +63 -0
novel_downloader/locales/en.json +12 -4
novel_downloader/locales/zh.json +12 -4
novel_downloader/models/__init__.py +4 -30
novel_downloader/models/config.py +12 -6
novel_downloader/models/search.py +16 -0
novel_downloader/models/types.py +0 -2
novel_downloader/resources/config/settings.toml +31 -4
novel_downloader/resources/css_styles/intro.css +83 -0
novel_downloader/resources/css_styles/main.css +30 -89
novel_downloader/utils/__init__.py +52 -0
novel_downloader/utils/chapter_storage.py +244 -224
novel_downloader/utils/constants.py +1 -21
novel_downloader/utils/epub/__init__.py +34 -0
novel_downloader/utils/epub/builder.py +377 -0
novel_downloader/utils/epub/constants.py +77 -0
novel_downloader/utils/epub/documents.py +403 -0
novel_downloader/utils/epub/models.py +134 -0
novel_downloader/utils/epub/utils.py +212 -0
novel_downloader/utils/file_utils/__init__.py +10 -14
novel_downloader/utils/file_utils/io.py +20 -51
novel_downloader/utils/file_utils/normalize.py +2 -2
novel_downloader/utils/file_utils/sanitize.py +2 -3
novel_downloader/utils/fontocr/__init__.py +5 -5
novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
novel_downloader/utils/fontocr/ocr_v1.py +13 -1
novel_downloader/utils/fontocr/ocr_v2.py +13 -1
novel_downloader/utils/fontocr/ocr_v3.py +744 -0
novel_downloader/utils/i18n.py +2 -0
novel_downloader/utils/logger.py +2 -0
novel_downloader/utils/network.py +110 -251
novel_downloader/utils/state.py +1 -0
novel_downloader/utils/text_utils/__init__.py +18 -17
novel_downloader/utils/text_utils/diff_display.py +4 -5
novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
novel_downloader/utils/text_utils/text_cleaner.py +179 -0
novel_downloader/utils/text_utils/truncate_utils.py +62 -0
novel_downloader/utils/time_utils/__init__.py +3 -3
novel_downloader/utils/time_utils/datetime_utils.py +4 -5
novel_downloader/utils/time_utils/sleep_utils.py +2 -3
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
novel_downloader-1.5.0.dist-info/RECORD +164 -0
novel_downloader/config/site_rules.py +0 -94
novel_downloader/core/factory/__init__.py +0 -20
novel_downloader/core/factory/downloader.py +0 -73
novel_downloader/core/factory/exporter.py +0 -58
novel_downloader/core/factory/fetcher.py +0 -96
novel_downloader/core/factory/parser.py +0 -86
novel_downloader/core/fetchers/common/__init__.py +0 -14
novel_downloader/core/fetchers/common/browser.py +0 -79
novel_downloader/core/fetchers/common/session.py +0 -79
novel_downloader/core/parsers/biquge/__init__.py +0 -10
novel_downloader/core/parsers/common/__init__.py +0 -13
novel_downloader/core/parsers/common/helper.py +0 -323
novel_downloader/core/parsers/common/main_parser.py +0 -106
novel_downloader/core/parsers/esjzone/__init__.py +0 -10
novel_downloader/core/parsers/linovelib/__init__.py +0 -10
novel_downloader/core/parsers/qianbi/__init__.py +0 -10
novel_downloader/core/parsers/sfacg/__init__.py +0 -10
novel_downloader/core/parsers/yamibo/__init__.py +0 -10
novel_downloader/models/browser.py +0 -21
novel_downloader/models/site_rules.py +0 -99
novel_downloader/models/tasks.py +0 -33
novel_downloader/resources/css_styles/volume-intro.css +0 -56
novel_downloader/resources/json/replace_word_map.json +0 -4
novel_downloader/resources/text/blacklist.txt +0 -22
novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
novel_downloader/utils/text_utils/font_mapping.py +0 -28
novel_downloader/utils/text_utils/text_cleaning.py +0 -107
novel_downloader-1.4.5.dist-info/RECORD +0 -165
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
{novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0

novel_downloader/utils/chapter_storage.py CHANGED Viewed

@@ -14,21 +14,21 @@ import types
 from pathlib import Path
 from typing import Any, Self, cast
-from novel_downloader.models import (
-    ChapterDict,
-    SaveMode,
-    StorageBackend,
-)
-from .file_utils import save_as_json
+from novel_downloader.models import ChapterDict
 _CREATE_TABLE_SQL = """
-CREATE TABLE IF NOT EXISTS "{table}" (
-    id TEXT PRIMARY KEY,
-    title TEXT NOT NULL,
-    content TEXT NOT NULL,
-    extra TEXT NOT NULL
-)
+CREATE TABLE IF NOT EXISTS chapters (
+  id        TEXT    NOT NULL,
+  source_id INTEGER NOT NULL,
+  priority  INTEGER NOT NULL DEFAULT 1000,
+  title     TEXT    NOT NULL,
+  content   TEXT    NOT NULL,
+  extra     TEXT,
+  PRIMARY KEY (id, source_id)
+);
+CREATE INDEX IF NOT EXISTS
+idx_chapters_id_priority ON chapters(id, priority);
 """
@@ -36,276 +36,292 @@ class ChapterStorage:
     """
     Manage storage of chapters in JSON files or an SQLite database.
-    :param raw_base: Base directory or file path for storage.
-    :param namespace: Novel identifier (subfolder name or DB/table basename).
-    :param backend_type: "json" (default) or "sqlite".
+    Supports storing multiple versions of each chapter from different sources,
+    each with a defined priority for selecting the preferred version.
     """
     def __init__(
         self,
         raw_base: str | Path,
-        namespace: str,
-        backend_type: StorageBackend = "json",
-        *,
-        batch_size: int = 1,
+        priorities: dict[int, int],
     ) -> None:
-        self.raw_base = Path(raw_base)
-        self.namespace = namespace
-        self.backend = backend_type
-        self._batch_size = batch_size
-        self._pending = 0
+        """
+        Initialize storage for a specific book.
+        :param raw_base: Directory path where the SQLite file will be stored.
+        :param priorities: Mapping of source_id to priority value.
+                           Lower numbers indicate higher priority.
+                           E.X. {0: 10, 1: 100} means source 0 is preferred.
+        """
+        self._db_path = Path(raw_base) / "chapter_data.sqlite"
         self._conn: sqlite3.Connection | None = None
-        self._existing_ids: set[str] = set()
-        if self.backend == "json":
-            self._init_json()
-        else:
-            self._init_sql()
-    def _init_json(self) -> None:
-        """Prepare directory for JSON files."""
-        self._json_dir = self.raw_base / self.namespace
-        self._json_dir.mkdir(parents=True, exist_ok=True)
-        self._existing_ids = {p.stem for p in self._json_dir.glob("*.json")}
-    def _init_sql(self) -> None:
-        """Prepare SQLite connection and ensure table exists."""
-        self._db_path = self.raw_base / f"{self.namespace}.sqlite"
+        self._priorities = priorities
+        self._existing_ids: set[tuple[str, int]] = set()  # (chap_id, source_id)
+    def connect(self) -> None:
+        """
+        Open the SQLite connection, enable foreign keys,
+        create schema, register initial sources, and cache existing keys.
+        """
+        if self._conn:
+            return
         self._conn = sqlite3.connect(self._db_path)
-        stmt = _CREATE_TABLE_SQL.format(table=self.namespace)
-        self._conn.execute(stmt)
+        self._conn.row_factory = sqlite3.Row
+        self._conn.execute("PRAGMA foreign_keys = ON;")
+        self._conn.executescript(_CREATE_TABLE_SQL)
         self._conn.commit()
+        self._load_existing_keys()
-        cur = self._conn.execute(f'SELECT id FROM "{self.namespace}"')
-        self._existing_ids = {row[0] for row in cur.fetchall()}
-    def _json_path(self, chap_id: str) -> Path:
-        """Return Path for JSON file of given chapter ID."""
-        return self._json_dir / f"{chap_id}.json"
-    def exists(self, chap_id: str) -> bool:
+    def exists(
+        self,
+        chap_id: str,
+        source_id: int | None = None,
+    ) -> bool:
         """
         Check if a chapter exists.
         :param chap_id: Chapter identifier.
+        :param source_id: If provided, check existence for that source.
         :return: True if found, else False.
         """
-        return chap_id in self._existing_ids
+        if source_id is not None:
+            return (chap_id, source_id) in self._existing_ids
+        return any(key[0] == chap_id for key in self._existing_ids)
-    def _load_json(self, chap_id: str) -> ChapterDict:
-        raw = self._json_path(chap_id).read_text(encoding="utf-8")
-        return cast(ChapterDict, json.loads(raw))
-    def _load_sql(self, chap_id: str) -> ChapterDict:
-        if self._conn is None:
-            raise RuntimeError("ChapterStorage is closed")
-        cur = self._conn.execute(
-            f'SELECT id, title, content, extra FROM "{self.namespace}" WHERE id = ?',
-            (chap_id,),
-        )
-        row = cur.fetchone()
-        return {
-            "id": row[0],
-            "title": row[1],
-            "content": row[2],
-            "extra": json.loads(row[3]),
-        }
-    def get(self, chap_id: str) -> ChapterDict | dict[str, Any]:
+    def upsert_chapter(
+        self,
+        data: ChapterDict,
+        source_id: int,
+    ) -> None:
         """
-        Retrieve chapter by ID.
+        Insert or update a single chapter record.
-        :param chap_id: Chapter identifier.
-        :return: ChapterDict if exists, else empty dict.
+        :param data: ChapterDict containing id, title, content, extra.
+        :param source_id: Integer index of source.
         """
-        if not self.exists(chap_id):
-            return {}
-        return (
-            self._load_json(chap_id)
-            if self.backend == "json"
-            else self._load_sql(chap_id)
+        priority = self._priorities[source_id]
+        chap_id = data["id"]
+        title = data["title"]
+        content = data["content"]
+        extra_json = json.dumps(data["extra"])
+        self.conn.execute(
+            """
+            INSERT OR REPLACE INTO chapters
+              (id, source_id, priority, title, content, extra)
+            VALUES (?, ?, ?, ?, ?, ?)
+            """,
+            (chap_id, source_id, priority, title, content, extra_json),
         )
+        self._existing_ids.add((chap_id, source_id))
+        self.conn.commit()
-    def _save_json(self, data: ChapterDict, on_exist: SaveMode) -> None:
-        path = self._json_path(data["id"])
-        save_as_json(data, path, on_exist=on_exist)
-        self._existing_ids.add(data["id"])
-    def _save_sql(self, data: ChapterDict, on_exist: SaveMode) -> None:
-        if self._conn is None:
-            raise RuntimeError("ChapterStorage is closed")
-        sql = (
-            f'INSERT OR REPLACE INTO "{self.namespace}" '
-            "(id, title, content, extra) VALUES (?, ?, ?, ?)"
-            if on_exist == "overwrite"
-            else f'INSERT OR IGNORE INTO "{self.namespace}" '
-            "(id, title, content, extra) VALUES (?, ?, ?, ?)"
-        )
-        self._conn.execute(
-            sql,
-            (
-                data["id"],
-                data["title"],
-                data["content"],
-                json.dumps(data["extra"], ensure_ascii=False),
-            ),
-        )
-        self._existing_ids.add(data["id"])
-        if self._batch_size == 1:
-            self._conn.commit()
-        else:
-            self._pending += 1
-            if self._pending >= self._batch_size:
-                self._conn.commit()
-                self._pending = 0
-    def _save_many_sql(
+    def upsert_chapters(
         self,
-        datas: list[ChapterDict],
-        on_exist: SaveMode = "overwrite",
+        data: list[ChapterDict],
+        source_id: int,
     ) -> None:
         """
-        Bulk-insert into SQLite using executemany + one commit.
+        Insert or update multiple chapters in one batch operation.
-        :param datas: List of ChapterDict to store.
-        :param on_exist: "overwrite" to REPLACE, "skip" to IGNORE on conflicts.
+        :param data: List of ChapterDicts.
+        :param source_id: Integer index of source.
         """
-        if on_exist not in ("overwrite", "skip"):
-            raise ValueError(f"invalid on_exist mode: {on_exist!r}")
-        if self._conn is None:
-            raise RuntimeError("ChapterStorage is closed")
-        sql = (
-            f'INSERT OR REPLACE INTO "{self.namespace}" '
-            "(id, title, content, extra) VALUES (?, ?, ?, ?)"
-            if on_exist == "overwrite"
-            else f'INSERT OR IGNORE INTO "{self.namespace}" '
-            "(id, title, content, extra) VALUES (?, ?, ?, ?)"
+        priority = self._priorities[source_id]
+        records = []
+        for chapter in data:
+            chap_id = chapter["id"]
+            title = chapter["title"]
+            content = chapter["content"]
+            extra_json = json.dumps(chapter["extra"])
+            records.append((chap_id, source_id, priority, title, content, extra_json))
+            self._existing_ids.add((chap_id, source_id))
+        self.conn.executemany(
+            """
+            INSERT OR REPLACE INTO chapters
+              (id, source_id, priority, title, content, extra)
+            VALUES (?, ?, ?, ?, ?, ?)
+            """,
+            records,
         )
+        self.conn.commit()
-        params = [
-            (
-                data["id"],
-                data["title"],
-                data["content"],
-                json.dumps(data["extra"], ensure_ascii=False),
-            )
-            for data in datas
-        ]
-        with self._conn:
-            self._conn.executemany(sql, params)
-        self._existing_ids.update(data["id"] for data in datas)
-    def save(
+    def get_chapter(
         self,
-        data: ChapterDict,
-        on_exist: SaveMode = "overwrite",
-    ) -> None:
+        chap_id: str,
+        source_id: int,
+    ) -> ChapterDict | None:
         """
-        Save a chapter record.
+        Retrieve a single chapter by id and source.
-        :param data: ChapterDict to store.
-        :param on_exist: What to do if chap_id already exists
+        :param chap_id: Chapter identifier.
+        :param source_id: Integer index of source.
+        :return: A ChapterDict if found, else None.
         """
-        if on_exist not in ("overwrite", "skip"):
-            raise ValueError(f"invalid on_exist mode: {on_exist!r}")
-        if self.backend == "json":
-            self._save_json(data, on_exist)
-        else:
-            self._save_sql(data, on_exist)
+        cur = self.conn.execute(
+            """
+            SELECT title, content, extra
+              FROM chapters
+             WHERE id = ? AND source_id = ?
+             LIMIT 1
+            """,
+            (chap_id, source_id),
+        )
+        row = cur.fetchone()
+        if not row:
+            return None
+        return ChapterDict(
+            id=chap_id,
+            title=row["title"],
+            content=row["content"],
+            extra=self._load_dict(row["extra"]),
+        )
-    def save_many(
+    def get_chapters(
         self,
-        datas: list[ChapterDict],
-        on_exist: SaveMode = "overwrite",
-    ) -> None:
+        chap_ids: list[str],
+        source_id: int,
+    ) -> dict[str, ChapterDict | None]:
         """
-        Save multiple chapter records in one shot.
+        Retrieve multiple chapters by their ids for a given source in one query.
-        :param datas: List of ChapterDict to store.
-        :param on_exist: What to do if chap_id already exists.
+        :param chap_ids: List of chapter identifiers.
+        :param source_id: Integer index of source.
+        :return: A dict mapping chap_id to ChapterDict or None.
         """
-        if on_exist not in ("overwrite", "skip"):
-            raise ValueError(f"invalid on_exist mode: {on_exist!r}")
-        if self.backend == "json":
-            for data in datas:
-                self._save_json(data, on_exist)
-        else:
-            self._save_many_sql(datas, on_exist)
-    def list_ids(self) -> list[str]:
+        placeholders = ",".join("?" for _ in chap_ids)
+        query = f"""
+            SELECT id, title, content, extra
+              FROM chapters
+             WHERE id IN ({placeholders}) AND source_id = ?
         """
-        List all stored chapter IDs.
-        """
-        if self.backend == "json":
-            return [p.stem for p in self._json_dir.glob("*.json") if p.is_file()]
-        if self._conn is None:
-            raise RuntimeError("ChapterStorage is closed")
-        cur = self._conn.execute(f'SELECT id FROM "{self.namespace}"')
-        return [row[0] for row in cur.fetchall()]
+        rows = self.conn.execute(query, (*chap_ids, source_id)).fetchall()
+        result: dict[str, ChapterDict | None] = {cid: None for cid in chap_ids}
+        for row in rows:
+            result[row["id"]] = ChapterDict(
+                id=row["id"],
+                title=row["title"],
+                content=row["content"],
+                extra=self._load_dict(row["extra"]),
+            )
+        return result
-    def delete(self, chap_id: str) -> bool:
+    def get_best_chapter(
+        self,
+        chap_id: str,
+    ) -> ChapterDict | None:
         """
-        Delete a chapter by ID.
-        :param chap_id: Chapter identifier.
-        :return: True if deleted, False if not found.
+        Retrieve the chapter with the highest priority (lowest priority number)
+        among all sources for the given chap_id.
         """
-        if not self.exists(chap_id):
-            return False
-        if self.backend == "json":
-            self._json_path(chap_id).unlink()
-            return True
-        if self._conn is None:
-            raise RuntimeError("ChapterStorage is closed")
-        cur = self._conn.execute(
-            f'DELETE FROM "{self.namespace}" WHERE id = ?', (chap_id,)
+        cur = self.conn.execute(
+            """
+            SELECT title, content, extra
+              FROM chapters
+             WHERE id = ?
+             ORDER BY priority ASC
+             LIMIT 1
+            """,
+            (chap_id,),
+        )
+        row = cur.fetchone()
+        if not row:
+            return None
+        return ChapterDict(
+            id=chap_id,
+            title=row["title"],
+            content=row["content"],
+            extra=self._load_dict(row["extra"]),
         )
-        self._conn.commit()
-        return cur.rowcount > 0
-    def count(self) -> int:
+    def get_best_chapters(
+        self,
+        chap_ids: list[str],
+    ) -> dict[str, ChapterDict | None]:
         """
-        Count total chapters stored.
+        Retrieve the best (highest-priority) chapter for each given id
+        in a single query using window functions.
         """
-        if self.backend == "json":
-            return len(self.list_ids())
-        if self._conn is None:
-            raise RuntimeError("ChapterStorage is closed")
-        cur = self._conn.execute(f'SELECT COUNT(1) FROM "{self.namespace}"')
-        return int(cur.fetchone()[0])
+        placeholders = ",".join("?" for _ in chap_ids)
+        query = f"""
+            SELECT chap_id, title, content, extra FROM (
+              SELECT id AS chap_id, title, content, extra,
+                     ROW_NUMBER() OVER (
+                       PARTITION BY id ORDER BY priority ASC
+                     ) AS rn
+                FROM chapters
+               WHERE id IN ({placeholders})
+            ) sub
+            WHERE rn = 1
+        """
+        rows = self.conn.execute(query, chap_ids).fetchall()
+        result: dict[str, ChapterDict | None] = {chap_id: None for chap_id in chap_ids}
+        for row in rows:
+            result[row["chap_id"]] = ChapterDict(
+                id=row["chap_id"],
+                title=row["title"],
+                content=row["content"],
+                extra=self._load_dict(row["extra"]),
+            )
+        return result
-    def flush(self) -> None:
+    def count(self) -> int:
         """
-        Write out any leftover rows (< batch_size) at the end.
+        Count total chapters stored.
         """
-        if self._conn is not None and self._pending > 0:
-            self._conn.commit()
-            self._pending = 0
+        return len(self._existing_ids)
     def close(self) -> None:
         """
         Gracefully close any open resources.
         """
-        if self.backend != "sqlite" or self._conn is None:
+        if self._conn is None:
             return
-        with contextlib.suppress(Exception):
-            self.flush()
         with contextlib.suppress(Exception):
             self._conn.close()
         self._conn = None
+        self._existing_ids = set()
+    @property
+    def conn(self) -> sqlite3.Connection:
+        """
+        Return the active SQLite connection, or raise if not connected.
+        :raises RuntimeError: if connect() has not been called.
+        """
+        if self._conn is None:
+            raise RuntimeError(
+                "Database connection is not established. Call connect() first."
+            )
+        return self._conn
+    def _load_existing_keys(self) -> None:
+        """
+        Cache all existing (chapter_id, source_id) pairs for fast upsert.
+        """
+        cur = self.conn.execute("SELECT id, source_id FROM chapters")
+        self._existing_ids = {(row["id"], row["source_id"]) for row in cur.fetchall()}
+    @staticmethod
+    def _load_dict(data: str) -> dict[str, Any]:
+        try:
+            parsed = json.loads(data)
+            return cast(dict[str, Any], parsed)
+        except Exception:
+            return {}
     def __enter__(self) -> Self:
+        """
+        Enter context manager, automatically connecting to the database.
+        """
+        self.connect()
         return self
     def __exit__(
@@ -314,14 +330,18 @@ class ChapterStorage:
         exc_val: BaseException | None,
         tb: types.TracebackType | None,
     ) -> None:
+        """
+        Exit context manager, closing the database connection.
+        """
         self.close()
     def __del__(self) -> None:
+        """
+        Ensure the database connection is closed upon object deletion.
+        """
         self.close()
     def __repr__(self) -> str:
         return (
-            f"<ChapterStorage ns='{self.namespace}' "
-            f"backend='{self.backend}' "
-            f"path='{self.raw_base}'>"
+            f"<ChapterStorage priorities='{self._priorities}' path='{self._db_path}'>"
         )

novel_downloader/utils/constants.py CHANGED Viewed

@@ -19,16 +19,6 @@ APP_NAME = "NovelDownloader"  # Display name
 APP_DIR_NAME = "novel_downloader"  # Directory name for platformdirs
 LOGGER_NAME = PACKAGE_NAME  # Root logger name
-SUPPORTED_SITES = {
-    "biquge",
-    "esjzone",
-    "linovelib",
-    "qianbi",
-    "qidian",
-    "sfacg",
-    "yamibo",
-}
 # -----------------------------------------------------------------------------
 # Base directories
 # -----------------------------------------------------------------------------
@@ -49,9 +39,7 @@ MODEL_CACHE_DIR = BASE_CONFIG_DIR / "models"
 # Default file paths
 # -----------------------------------------------------------------------------
 STATE_FILE = DATA_DIR / "state.json"
-HASH_STORE_FILE = DATA_DIR / "image_hashes.json"
 SETTING_FILE = CONFIG_DIR / "settings.json"
-SITE_RULES_FILE = CONFIG_DIR / "site_rules.json"
 DEFAULT_USER_DATA_DIR = DATA_DIR / "browser_data"
@@ -91,9 +79,7 @@ DEFAULT_SETTINGS_PATHS = [
 # CSS Styles
 CSS_MAIN_PATH = files("novel_downloader.resources.css_styles").joinpath("main.css")
-CSS_VOLUME_INTRO_PATH = files("novel_downloader.resources.css_styles").joinpath(
-    "volume-intro.css"
-)
+CSS_INTRO_PATH = files("novel_downloader.resources.css_styles").joinpath("intro.css")
 # Images
 VOLUME_BORDER_IMAGE_PATH = files("novel_downloader.resources.images").joinpath(
@@ -101,9 +87,6 @@ VOLUME_BORDER_IMAGE_PATH = files("novel_downloader.resources.images").joinpath(
 )
 # JSON
-REPLACE_WORD_MAP_PATH = files("novel_downloader.resources.json").joinpath(
-    "replace_word_map.json"
-)
 LINOVELIB_FONT_MAP_PATH = files("novel_downloader.resources.json").joinpath(
     "linovelib_font_map.json"
 )
@@ -113,9 +96,6 @@ QD_DECRYPT_SCRIPT_PATH = files("novel_downloader.resources.js_scripts").joinpath
     "qidian_decrypt_node.js"
 )
-# Text Files
-BLACKLIST_PATH = files("novel_downloader.resources.text").joinpath("blacklist.txt")
 # ---------------------------------------------------------------------
 # Pretrained model registry (e.g. used in font recovery or OCR)
 # ---------------------------------------------------------------------

novel_downloader/utils/epub/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+"""
+novel_downloader.utils.epub
+---------------------------
+Top-level package for EPUB export utilities.
+Key components:
+- EpubBuilder : orchestrates metadata, manifest, spine, navigation, and resources
+- Chapter, Volume : represent and render content sections and volume intros
+Usage example:
+```python
+builder = EpubBuilder(title="My Novel", author="Author Name", uid="uuid-1234")
+builder.add_chapter(Chapter(id="ch1", title="Chapter 1", content="<p>xxx</p>"))
+builder.export("output/my_novel.epub")
+```
+"""
+__all__ = [
+    "EpubBuilder",
+    "Chapter",
+    "Volume",
+    "StyleSheet",
+]
+from .builder import EpubBuilder
+from .models import (
+    Chapter,
+    StyleSheet,
+    Volume,
+)

novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

novel-downloader 1.4.5py3-none-any.whl → 1.5.0py3-none-any.whl